diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-05 22:31:59 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-05 22:31:59 +0300 |
commit | 5518b69b76680a4f2df96b1deca260059db0c2de (patch) | |
tree | f33cd1519c8efb4590500f2f9617400be233238c /net | |
parent | 8ad06e56dcbc1984ef0ff8f6e3c19982c5809f73 (diff) | |
parent | 0e72582270c07850b92cac351c8b97d4f9c123b9 (diff) | |
download | linux-5518b69b76680a4f2df96b1deca260059db0c2de.tar.xz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
"Reasonably busy this cycle, but perhaps not as busy as in the 4.12
merge window:
1) Several optimizations for UDP processing under high load from
Paolo Abeni.
2) Support pacing internally in TCP when using the sch_fq packet
scheduler for this is not practical. From Eric Dumazet.
3) Support mutliple filter chains per qdisc, from Jiri Pirko.
4) Move to 1ms TCP timestamp clock, from Eric Dumazet.
5) Add batch dequeueing to vhost_net, from Jason Wang.
6) Flesh out more completely SCTP checksum offload support, from
Davide Caratti.
7) More plumbing of extended netlink ACKs, from David Ahern, Pablo
Neira Ayuso, and Matthias Schiffer.
8) Add devlink support to nfp driver, from Simon Horman.
9) Add RTM_F_FIB_MATCH flag to RTM_GETROUTE queries, from Roopa
Prabhu.
10) Add stack depth tracking to BPF verifier and use this information
in the various eBPF JITs. From Alexei Starovoitov.
11) Support XDP on qed device VFs, from Yuval Mintz.
12) Introduce BPF PROG ID for better introspection of installed BPF
programs. From Martin KaFai Lau.
13) Add bpf_set_hash helper for TC bpf programs, from Daniel Borkmann.
14) For loads, allow narrower accesses in bpf verifier checking, from
Yonghong Song.
15) Support MIPS in the BPF selftests and samples infrastructure, the
MIPS eBPF JIT will be merged in via the MIPS GIT tree. From David
Daney.
16) Support kernel based TLS, from Dave Watson and others.
17) Remove completely DST garbage collection, from Wei Wang.
18) Allow installing TCP MD5 rules using prefixes, from Ivan
Delalande.
19) Add XDP support to Intel i40e driver, from Björn Töpel
20) Add support for TC flower offload in nfp driver, from Simon
Horman, Pieter Jansen van Vuuren, Benjamin LaHaise, Jakub
Kicinski, and Bert van Leeuwen.
21) IPSEC offloading support in mlx5, from Ilan Tayari.
22) Add HW PTP support to macb driver, from Rafal Ozieblo.
23) Networking refcount_t conversions, From Elena Reshetova.
24) Add sock_ops support to BPF, from Lawrence Brako. This is useful
for tuning the TCP sockopt settings of a group of applications,
currently via CGROUPs"
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1899 commits)
net: phy: dp83867: add workaround for incorrect RX_CTRL pin strap
dt-bindings: phy: dp83867: provide a workaround for incorrect RX_CTRL pin strap
cxgb4: Support for get_ts_info ethtool method
cxgb4: Add PTP Hardware Clock (PHC) support
cxgb4: time stamping interface for PTP
nfp: default to chained metadata prepend format
nfp: remove legacy MAC address lookup
nfp: improve order of interfaces in breakout mode
net: macb: remove extraneous return when MACB_EXT_DESC is defined
bpf: add missing break in for the TCP_BPF_SNDCWND_CLAMP case
bpf: fix return in load_bpf_file
mpls: fix rtm policy in mpls_getroute
net, ax25: convert ax25_cb.refcount from atomic_t to refcount_t
net, ax25: convert ax25_route.refcount from atomic_t to refcount_t
net, ax25: convert ax25_uid_assoc.refcount from atomic_t to refcount_t
net, sctp: convert sctp_ep_common.refcnt from atomic_t to refcount_t
net, sctp: convert sctp_transport.refcnt from atomic_t to refcount_t
net, sctp: convert sctp_chunk.refcnt from atomic_t to refcount_t
net, sctp: convert sctp_datamsg.refcnt from atomic_t to refcount_t
net, sctp: convert sctp_auth_bytes.refcnt from atomic_t to refcount_t
...
Diffstat (limited to 'net')
440 files changed, 10808 insertions, 5111 deletions
diff --git a/net/802/fc.c b/net/802/fc.c index 1bb496ea997e..058a9f708918 100644 --- a/net/802/fc.c +++ b/net/802/fc.c @@ -49,7 +49,7 @@ static int fc_header(struct sk_buff *skb, struct net_device *dev, struct fcllc *fcllc; hdr_len = sizeof(struct fch_hdr) + sizeof(struct fcllc); - fch = (struct fch_hdr *)skb_push(skb, hdr_len); + fch = skb_push(skb, hdr_len); fcllc = (struct fcllc *)(fch+1); fcllc->dsap = fcllc->ssap = EXTENDED_SAP; fcllc->llc = UI_CMD; @@ -59,7 +59,7 @@ static int fc_header(struct sk_buff *skb, struct net_device *dev, else { hdr_len = sizeof(struct fch_hdr); - fch = (struct fch_hdr *)skb_push(skb, hdr_len); + fch = skb_push(skb, hdr_len); } if(saddr) diff --git a/net/802/fddi.c b/net/802/fddi.c index 6356623fc238..90f1416567a1 100644 --- a/net/802/fddi.c +++ b/net/802/fddi.c @@ -58,7 +58,7 @@ static int fddi_header(struct sk_buff *skb, struct net_device *dev, if(type != ETH_P_IP && type != ETH_P_IPV6 && type != ETH_P_ARP) hl=FDDI_K_8022_HLEN-3; - fddi = (struct fddihdr *)skb_push(skb, hl); + fddi = skb_push(skb, hl); fddi->fc = FDDI_FC_K_ASYNC_LLC_DEF; if(type == ETH_P_IP || type == ETH_P_IPV6 || type == ETH_P_ARP) { diff --git a/net/802/garp.c b/net/802/garp.c index b38ee6dcba45..2dac647ff420 100644 --- a/net/802/garp.c +++ b/net/802/garp.c @@ -221,7 +221,7 @@ static int garp_pdu_init(struct garp_applicant *app) skb->protocol = htons(ETH_P_802_2); skb_reserve(skb, LL_RESERVED_SPACE(app->dev) + LLC_RESERVE); - gp = (struct garp_pdu_hdr *)__skb_put(skb, sizeof(*gp)); + gp = __skb_put(skb, sizeof(*gp)); put_unaligned(htons(GARP_PROTOCOL_ID), &gp->protocol); app->pdu = skb; @@ -232,7 +232,7 @@ static int garp_pdu_append_end_mark(struct garp_applicant *app) { if (skb_tailroom(app->pdu) < sizeof(u8)) return -1; - *(u8 *)__skb_put(app->pdu, sizeof(u8)) = GARP_END_MARK; + __skb_put_u8(app->pdu, GARP_END_MARK); return 0; } @@ -268,7 +268,7 @@ static int garp_pdu_append_msg(struct garp_applicant *app, u8 attrtype) if (skb_tailroom(app->pdu) < sizeof(*gm)) return -1; - gm = (struct garp_msg_hdr *)__skb_put(app->pdu, sizeof(*gm)); + gm = __skb_put(app->pdu, sizeof(*gm)); gm->attrtype = attrtype; garp_cb(app->pdu)->cur_type = attrtype; return 0; @@ -299,7 +299,7 @@ again: len = sizeof(*ga) + attr->dlen; if (skb_tailroom(app->pdu) < len) goto queue; - ga = (struct garp_attr_hdr *)__skb_put(app->pdu, len); + ga = __skb_put(app->pdu, len); ga->len = len; ga->event = event; memcpy(ga->data, attr->data, attr->dlen); diff --git a/net/802/hippi.c b/net/802/hippi.c index 4460606e9c36..690308b9b94a 100644 --- a/net/802/hippi.c +++ b/net/802/hippi.c @@ -47,7 +47,7 @@ static int hippi_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned int len) { - struct hippi_hdr *hip = (struct hippi_hdr *)skb_push(skb, HIPPI_HLEN); + struct hippi_hdr *hip = skb_push(skb, HIPPI_HLEN); struct hippi_cb *hcb = (struct hippi_cb *) skb->cb; if (!len){ diff --git a/net/802/mrp.c b/net/802/mrp.c index 72db2785ef2c..be4dd3165347 100644 --- a/net/802/mrp.c +++ b/net/802/mrp.c @@ -311,7 +311,7 @@ static int mrp_pdu_init(struct mrp_applicant *app) skb_reset_network_header(skb); skb_reset_transport_header(skb); - ph = (struct mrp_pdu_hdr *)__skb_put(skb, sizeof(*ph)); + ph = __skb_put(skb, sizeof(*ph)); ph->version = app->app->version; app->pdu = skb; @@ -324,7 +324,7 @@ static int mrp_pdu_append_end_mark(struct mrp_applicant *app) if (skb_tailroom(app->pdu) < sizeof(*endmark)) return -1; - endmark = (__be16 *)__skb_put(app->pdu, sizeof(*endmark)); + endmark = __skb_put(app->pdu, sizeof(*endmark)); put_unaligned(MRP_END_MARK, endmark); return 0; } @@ -368,7 +368,7 @@ static int mrp_pdu_append_msg_hdr(struct mrp_applicant *app, if (skb_tailroom(app->pdu) < sizeof(*mh)) return -1; - mh = (struct mrp_msg_hdr *)__skb_put(app->pdu, sizeof(*mh)); + mh = __skb_put(app->pdu, sizeof(*mh)); mh->attrtype = attrtype; mh->attrlen = attrlen; mrp_cb(app->pdu)->mh = mh; @@ -382,8 +382,7 @@ static int mrp_pdu_append_vecattr_hdr(struct mrp_applicant *app, if (skb_tailroom(app->pdu) < sizeof(*vah) + attrlen) return -1; - vah = (struct mrp_vecattr_hdr *)__skb_put(app->pdu, - sizeof(*vah) + attrlen); + vah = __skb_put(app->pdu, sizeof(*vah) + attrlen); put_unaligned(0, &vah->lenflags); memcpy(vah->firstattrvalue, firstattrvalue, attrlen); mrp_cb(app->pdu)->vah = vah; @@ -435,7 +434,7 @@ again: if (!pos) { if (skb_tailroom(app->pdu) < sizeof(u8)) goto queue; - vaevents = (u8 *)__skb_put(app->pdu, sizeof(u8)); + vaevents = __skb_put(app->pdu, sizeof(u8)); } else { vaevents = (u8 *)(skb_tail_pointer(app->pdu) - sizeof(u8)); } diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index abc5f400fc71..f7e83f6d2e64 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -58,7 +58,7 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, int rc; if (!(vlan->flags & VLAN_FLAG_REORDER_HDR)) { - vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN); + vhdr = skb_push(skb, VLAN_HLEN); vlan_tci = vlan->vlan_id; vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb->priority); @@ -797,12 +797,6 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_netpoll_cleanup = vlan_dev_netpoll_cleanup, #endif .ndo_fix_features = vlan_dev_fix_features, - .ndo_fdb_add = switchdev_port_fdb_add, - .ndo_fdb_del = switchdev_port_fdb_del, - .ndo_fdb_dump = switchdev_port_fdb_dump, - .ndo_bridge_setlink = switchdev_port_bridge_setlink, - .ndo_bridge_getlink = switchdev_port_bridge_getlink, - .ndo_bridge_dellink = switchdev_port_bridge_dellink, .ndo_get_lock_subclass = vlan_dev_get_lock_subclass, .ndo_get_iflink = vlan_dev_get_iflink, }; diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 9c94aad153b3..5e831de3103e 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -39,7 +39,8 @@ static inline int vlan_validate_qos_map(struct nlattr *attr) NULL); } -static int vlan_validate(struct nlattr *tb[], struct nlattr *data[]) +static int vlan_validate(struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) { struct ifla_vlan_flags *flags; u16 id; @@ -87,8 +88,9 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } -static int vlan_changelink(struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) +static int vlan_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[], + struct netlink_ext_ack *extack) { struct ifla_vlan_flags *flags; struct ifla_vlan_qos_mapping *m; @@ -115,7 +117,8 @@ static int vlan_changelink(struct net_device *dev, } static int vlan_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct net_device *real_dev; @@ -153,7 +156,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev, else if (dev->mtu > max_mtu) return -EINVAL; - err = vlan_changelink(dev, tb, data); + err = vlan_changelink(dev, tb, data, extack); if (err < 0) return err; diff --git a/net/Kconfig b/net/Kconfig index 102f781a0131..7d57ef34b79c 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -55,6 +55,7 @@ menu "Networking options" source "net/packet/Kconfig" source "net/unix/Kconfig" +source "net/tls/Kconfig" source "net/xfrm/Kconfig" source "net/iucv/Kconfig" source "net/smc/Kconfig" diff --git a/net/Makefile b/net/Makefile index 9086ffbb5085..bed80fa398b7 100644 --- a/net/Makefile +++ b/net/Makefile @@ -15,6 +15,7 @@ obj-$(CONFIG_LLC) += llc/ obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ bpf/ obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_INET) += ipv4/ +obj-$(CONFIG_TLS) += tls/ obj-$(CONFIG_XFRM) += xfrm/ obj-$(CONFIG_UNIX) += unix/ obj-$(CONFIG_NET) += ipv6/ diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 465cc24b41e5..5d035c1f1156 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1529,7 +1529,7 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev, * The push leaves us with a ddephdr not an shdr, and * handily the port bytes in the right place preset. */ - ddp = (struct ddpehdr *) skb_push(skb, sizeof(*ddp) - 4); + ddp = skb_push(skb, sizeof(*ddp) - 4); /* Now fill in the long header */ @@ -1647,7 +1647,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) SOCK_DEBUG(sk, "SK %p: Begin build.\n", sk); - ddp = (struct ddpehdr *)skb_put(skb, sizeof(struct ddpehdr)); + ddp = skb_put(skb, sizeof(struct ddpehdr)); ddp->deh_len_hops = htons(len + sizeof(*ddp)); ddp->deh_dnet = usat->sat_addr.s_net; ddp->deh_snet = at->src_net; diff --git a/net/atm/br2684.c b/net/atm/br2684.c index fca84e111c89..4e111196f902 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -252,7 +252,7 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev, ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc; pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev); - atomic_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc); + refcount_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc); ATM_SKB(skb)->atm_options = atmvcc->atm_options; dev->stats.tx_packets++; dev->stats.tx_bytes += skb->len; diff --git a/net/atm/clip.c b/net/atm/clip.c index ec527b62f79d..f271a7bcf5b2 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -60,7 +60,7 @@ static int to_atmarpd(enum atmarp_ctrl_type type, int itf, __be32 ip) skb = alloc_skb(sizeof(struct atmarp_ctrl), GFP_ATOMIC); if (!skb) return -ENOMEM; - ctrl = (struct atmarp_ctrl *)skb_put(skb, sizeof(struct atmarp_ctrl)); + ctrl = skb_put(skb, sizeof(struct atmarp_ctrl)); ctrl->type = type; ctrl->itf_num = itf; ctrl->ip = ip; @@ -137,11 +137,11 @@ static int neigh_check_cb(struct neighbour *n) if (entry->vccs || time_before(jiffies, entry->expires)) return 0; - if (atomic_read(&n->refcnt) > 1) { + if (refcount_read(&n->refcnt) > 1) { struct sk_buff *skb; pr_debug("destruction postponed with ref %d\n", - atomic_read(&n->refcnt)); + refcount_read(&n->refcnt)); while ((skb = skb_dequeue(&n->arp_queue)) != NULL) dev_kfree_skb(skb); @@ -381,7 +381,7 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb, memcpy(here, llc_oui, sizeof(llc_oui)); ((__be16 *) here)[3] = skb->protocol; } - atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); + refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); ATM_SKB(skb)->atm_options = vcc->atm_options; entry->vccs->last_use = jiffies; pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, vcc, vcc->dev); @@ -767,7 +767,7 @@ static void atmarp_info(struct seq_file *seq, struct neighbour *n, seq_printf(seq, "(resolving)\n"); else seq_printf(seq, "(expired, ref %d)\n", - atomic_read(&entry->neigh->refcnt)); + refcount_read(&entry->neigh->refcnt)); } else if (!svc) { seq_printf(seq, "%d.%d.%d\n", clip_vcc->vcc->dev->number, diff --git a/net/atm/common.c b/net/atm/common.c index f06422f4108d..8a4f99114cd2 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -80,9 +80,9 @@ static void vcc_sock_destruct(struct sock *sk) printk(KERN_DEBUG "%s: rmem leakage (%d bytes) detected.\n", __func__, atomic_read(&sk->sk_rmem_alloc)); - if (atomic_read(&sk->sk_wmem_alloc)) + if (refcount_read(&sk->sk_wmem_alloc)) printk(KERN_DEBUG "%s: wmem leakage (%d bytes) detected.\n", - __func__, atomic_read(&sk->sk_wmem_alloc)); + __func__, refcount_read(&sk->sk_wmem_alloc)); } static void vcc_def_wakeup(struct sock *sk) @@ -101,7 +101,7 @@ static inline int vcc_writable(struct sock *sk) struct atm_vcc *vcc = atm_sk(sk); return (vcc->qos.txtp.max_sdu + - atomic_read(&sk->sk_wmem_alloc)) <= sk->sk_sndbuf; + refcount_read(&sk->sk_wmem_alloc)) <= sk->sk_sndbuf; } static void vcc_write_space(struct sock *sk) @@ -156,7 +156,7 @@ int vcc_create(struct net *net, struct socket *sock, int protocol, int family, i memset(&vcc->local, 0, sizeof(struct sockaddr_atmsvc)); memset(&vcc->remote, 0, sizeof(struct sockaddr_atmsvc)); vcc->qos.txtp.max_sdu = 1 << 16; /* for meta VCs */ - atomic_set(&sk->sk_wmem_alloc, 1); + refcount_set(&sk->sk_wmem_alloc, 1); atomic_set(&sk->sk_rmem_alloc, 0); vcc->push = NULL; vcc->pop = NULL; @@ -630,7 +630,7 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size) goto out; } pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize); - atomic_add(skb->truesize, &sk->sk_wmem_alloc); + refcount_add(skb->truesize, &sk->sk_wmem_alloc); skb->dev = NULL; /* for paths shared with net_device interfaces */ ATM_SKB(skb)->atm_options = vcc->atm_options; diff --git a/net/atm/lec.c b/net/atm/lec.c index 09cfe87f0a44..093fe8707731 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -101,12 +101,12 @@ static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc); /* must be done under lec_arp_lock */ static inline void lec_arp_hold(struct lec_arp_table *entry) { - atomic_inc(&entry->usage); + refcount_inc(&entry->usage); } static inline void lec_arp_put(struct lec_arp_table *entry) { - if (atomic_dec_and_test(&entry->usage)) + if (refcount_dec_and_test(&entry->usage)) kfree(entry); } @@ -181,7 +181,7 @@ lec_send(struct atm_vcc *vcc, struct sk_buff *skb) ATM_SKB(skb)->vcc = vcc; ATM_SKB(skb)->atm_options = vcc->atm_options; - atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); + refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); if (vcc->send(vcc, skb) < 0) { dev->stats.tx_dropped++; return; @@ -345,7 +345,7 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) int i; char *tmp; /* FIXME */ - atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); + WARN_ON(refcount_sub_and_test(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc)); mesg = (struct atmlec_msg *)skb->data; tmp = skb->data; tmp += sizeof(struct atmlec_msg); @@ -1564,7 +1564,7 @@ static struct lec_arp_table *make_entry(struct lec_priv *priv, to_return->last_used = jiffies; to_return->priv = priv; skb_queue_head_init(&to_return->tx_wait); - atomic_set(&to_return->usage, 1); + refcount_set(&to_return->usage, 1); return to_return; } diff --git a/net/atm/lec_arpc.h b/net/atm/lec_arpc.h index ec67435a40a6..d923f53812a3 100644 --- a/net/atm/lec_arpc.h +++ b/net/atm/lec_arpc.h @@ -47,7 +47,7 @@ struct lec_arp_table { * the length of the tlvs array */ struct sk_buff_head tx_wait; /* wait queue for outgoing packets */ - atomic_t usage; /* usage count */ + refcount_t usage; /* usage count */ }; /* diff --git a/net/atm/mpc.c b/net/atm/mpc.c index a190800572bd..680a4b9095a1 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -555,7 +555,7 @@ static int send_via_shortcut(struct sk_buff *skb, struct mpoa_client *mpc) sizeof(struct llc_snap_hdr)); } - atomic_add(skb->truesize, &sk_atm(entry->shortcut)->sk_wmem_alloc); + refcount_add(skb->truesize, &sk_atm(entry->shortcut)->sk_wmem_alloc); ATM_SKB(skb)->atm_options = entry->shortcut->atm_options; entry->shortcut->send(entry->shortcut, skb); entry->packets_fwded++; @@ -911,7 +911,7 @@ static int msg_from_mpoad(struct atm_vcc *vcc, struct sk_buff *skb) struct mpoa_client *mpc = find_mpc_by_vcc(vcc); struct k_message *mesg = (struct k_message *)skb->data; - atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); + WARN_ON(refcount_sub_and_test(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc)); if (mpc == NULL) { pr_info("no mpc found\n"); diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c index a89fdebeffda..4ccaa16b1eb1 100644 --- a/net/atm/mpoa_caches.c +++ b/net/atm/mpoa_caches.c @@ -40,7 +40,7 @@ static in_cache_entry *in_cache_get(__be32 dst_ip, entry = client->in_cache; while (entry != NULL) { if (entry->ctrl_info.in_dst_ip == dst_ip) { - atomic_inc(&entry->use); + refcount_inc(&entry->use); read_unlock_bh(&client->ingress_lock); return entry; } @@ -61,7 +61,7 @@ static in_cache_entry *in_cache_get_with_mask(__be32 dst_ip, entry = client->in_cache; while (entry != NULL) { if ((entry->ctrl_info.in_dst_ip & mask) == (dst_ip & mask)) { - atomic_inc(&entry->use); + refcount_inc(&entry->use); read_unlock_bh(&client->ingress_lock); return entry; } @@ -82,7 +82,7 @@ static in_cache_entry *in_cache_get_by_vcc(struct atm_vcc *vcc, entry = client->in_cache; while (entry != NULL) { if (entry->shortcut == vcc) { - atomic_inc(&entry->use); + refcount_inc(&entry->use); read_unlock_bh(&client->ingress_lock); return entry; } @@ -105,7 +105,7 @@ static in_cache_entry *in_cache_add_entry(__be32 dst_ip, dprintk("adding an ingress entry, ip = %pI4\n", &dst_ip); - atomic_set(&entry->use, 1); + refcount_set(&entry->use, 1); dprintk("new_in_cache_entry: about to lock\n"); write_lock_bh(&client->ingress_lock); entry->next = client->in_cache; @@ -121,7 +121,7 @@ static in_cache_entry *in_cache_add_entry(__be32 dst_ip, entry->count = 1; entry->entry_state = INGRESS_INVALID; entry->ctrl_info.holding_time = HOLDING_TIME_DEFAULT; - atomic_inc(&entry->use); + refcount_inc(&entry->use); write_unlock_bh(&client->ingress_lock); dprintk("new_in_cache_entry: unlocked\n"); @@ -178,7 +178,7 @@ static int cache_hit(in_cache_entry *entry, struct mpoa_client *mpc) static void in_cache_put(in_cache_entry *entry) { - if (atomic_dec_and_test(&entry->use)) { + if (refcount_dec_and_test(&entry->use)) { memset(entry, 0, sizeof(in_cache_entry)); kfree(entry); } @@ -339,7 +339,7 @@ static eg_cache_entry *eg_cache_get_by_cache_id(__be32 cache_id, entry = mpc->eg_cache; while (entry != NULL) { if (entry->ctrl_info.cache_id == cache_id) { - atomic_inc(&entry->use); + refcount_inc(&entry->use); read_unlock_irq(&mpc->egress_lock); return entry; } @@ -360,7 +360,7 @@ static eg_cache_entry *eg_cache_get_by_tag(__be32 tag, struct mpoa_client *mpc) entry = mpc->eg_cache; while (entry != NULL) { if (entry->ctrl_info.tag == tag) { - atomic_inc(&entry->use); + refcount_inc(&entry->use); read_unlock_irqrestore(&mpc->egress_lock, flags); return entry; } @@ -382,7 +382,7 @@ static eg_cache_entry *eg_cache_get_by_vcc(struct atm_vcc *vcc, entry = mpc->eg_cache; while (entry != NULL) { if (entry->shortcut == vcc) { - atomic_inc(&entry->use); + refcount_inc(&entry->use); read_unlock_irqrestore(&mpc->egress_lock, flags); return entry; } @@ -402,7 +402,7 @@ static eg_cache_entry *eg_cache_get_by_src_ip(__be32 ipaddr, entry = mpc->eg_cache; while (entry != NULL) { if (entry->latest_ip_addr == ipaddr) { - atomic_inc(&entry->use); + refcount_inc(&entry->use); read_unlock_irq(&mpc->egress_lock); return entry; } @@ -415,7 +415,7 @@ static eg_cache_entry *eg_cache_get_by_src_ip(__be32 ipaddr, static void eg_cache_put(eg_cache_entry *entry) { - if (atomic_dec_and_test(&entry->use)) { + if (refcount_dec_and_test(&entry->use)) { memset(entry, 0, sizeof(eg_cache_entry)); kfree(entry); } @@ -468,7 +468,7 @@ static eg_cache_entry *eg_cache_add_entry(struct k_message *msg, dprintk("adding an egress entry, ip = %pI4, this should be our IP\n", &msg->content.eg_info.eg_dst_ip); - atomic_set(&entry->use, 1); + refcount_set(&entry->use, 1); dprintk("new_eg_cache_entry: about to lock\n"); write_lock_irq(&client->egress_lock); entry->next = client->eg_cache; @@ -484,7 +484,7 @@ static eg_cache_entry *eg_cache_add_entry(struct k_message *msg, dprintk("new_eg_cache_entry cache_id %u\n", ntohl(entry->ctrl_info.cache_id)); dprintk("mps_ip = %pI4\n", &entry->ctrl_info.mps_ip); - atomic_inc(&entry->use); + refcount_inc(&entry->use); write_unlock_irq(&client->egress_lock); dprintk("new_eg_cache_entry: unlocked\n"); diff --git a/net/atm/mpoa_caches.h b/net/atm/mpoa_caches.h index 8e5f78cf0be1..30fe34841ced 100644 --- a/net/atm/mpoa_caches.h +++ b/net/atm/mpoa_caches.h @@ -6,6 +6,7 @@ #include <linux/atm.h> #include <linux/atmdev.h> #include <linux/atmmpc.h> +#include <linux/refcount.h> struct mpoa_client; @@ -25,7 +26,7 @@ typedef struct in_cache_entry { struct atm_vcc *shortcut; uint8_t MPS_ctrl_ATM_addr[ATM_ESA_LEN]; struct in_ctrl_info ctrl_info; - atomic_t use; + refcount_t use; } in_cache_entry; struct in_cache_ops{ @@ -58,7 +59,7 @@ typedef struct eg_cache_entry{ uint16_t entry_state; __be32 latest_ip_addr; /* The src IP address of the last packet */ struct eg_ctrl_info ctrl_info; - atomic_t use; + refcount_t use; } eg_cache_entry; struct eg_cache_ops{ diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c index c4e09846d1de..21d9d341a619 100644 --- a/net/atm/pppoatm.c +++ b/net/atm/pppoatm.c @@ -350,7 +350,7 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb) return 1; } - atomic_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc); + refcount_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc); ATM_SKB(skb)->atm_options = ATM_SKB(skb)->vcc->atm_options; pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, ATM_SKB(skb)->vcc, ATM_SKB(skb)->vcc->dev); diff --git a/net/atm/proc.c b/net/atm/proc.c index bbb6461a4b7f..4caca2a90ec4 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -61,7 +61,7 @@ static void atm_dev_info(struct seq_file *seq, const struct atm_dev *dev) add_stats(seq, "0", &dev->stats.aal0); seq_puts(seq, " "); add_stats(seq, "5", &dev->stats.aal5); - seq_printf(seq, "\t[%d]", atomic_read(&dev->refcnt)); + seq_printf(seq, "\t[%d]", refcount_read(&dev->refcnt)); seq_putc(seq, '\n'); } @@ -211,7 +211,7 @@ static void vcc_info(struct seq_file *seq, struct atm_vcc *vcc) vcc->flags, sk->sk_err, sk_wmem_alloc_get(sk), sk->sk_sndbuf, sk_rmem_alloc_get(sk), sk->sk_rcvbuf, - atomic_read(&sk->sk_refcnt)); + refcount_read(&sk->sk_refcnt)); } static void svc_info(struct seq_file *seq, struct atm_vcc *vcc) diff --git a/net/atm/raw.c b/net/atm/raw.c index 2e17e97a7a8b..821c0797553d 100644 --- a/net/atm/raw.c +++ b/net/atm/raw.c @@ -35,7 +35,7 @@ static void atm_pop_raw(struct atm_vcc *vcc, struct sk_buff *skb) pr_debug("(%d) %d -= %d\n", vcc->vci, sk_wmem_alloc_get(sk), skb->truesize); - atomic_sub(skb->truesize, &sk->sk_wmem_alloc); + WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)); dev_kfree_skb_any(skb); sk->sk_write_space(sk); } diff --git a/net/atm/resources.c b/net/atm/resources.c index 0447d5d0b639..918244757b7d 100644 --- a/net/atm/resources.c +++ b/net/atm/resources.c @@ -109,7 +109,7 @@ struct atm_dev *atm_dev_register(const char *type, struct device *parent, else memset(&dev->flags, 0, sizeof(dev->flags)); memset(&dev->stats, 0, sizeof(dev->stats)); - atomic_set(&dev->refcnt, 1); + refcount_set(&dev->refcnt, 1); if (atm_proc_dev_register(dev) < 0) { pr_err("atm_proc_dev_register failed for dev %s\n", type); diff --git a/net/atm/signaling.c b/net/atm/signaling.c index adb6e3d21b1e..983c3a21a133 100644 --- a/net/atm/signaling.c +++ b/net/atm/signaling.c @@ -67,7 +67,7 @@ static int sigd_send(struct atm_vcc *vcc, struct sk_buff *skb) struct sock *sk; msg = (struct atmsvc_msg *) skb->data; - atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); + WARN_ON(refcount_sub_and_test(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc)); vcc = *(struct atm_vcc **) &msg->vcc; pr_debug("%d (0x%lx)\n", (int)msg->type, (unsigned long)vcc); sk = sk_atm(vcc); @@ -150,8 +150,7 @@ void sigd_enq2(struct atm_vcc *vcc, enum atmsvc_msg_type type, pr_debug("%d (0x%p)\n", (int)type, vcc); while (!(skb = alloc_skb(sizeof(struct atmsvc_msg), GFP_KERNEL))) schedule(); - msg = (struct atmsvc_msg *)skb_put(skb, sizeof(struct atmsvc_msg)); - memset(msg, 0, sizeof(*msg)); + msg = skb_put_zero(skb, sizeof(struct atmsvc_msg)); msg->type = type; *(struct atm_vcc **) &msg->vcc = vcc; *(struct atm_vcc **) &msg->listen_vcc = listen_vcc; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index b7c486752b3a..f3f9d18891de 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -510,7 +510,7 @@ ax25_cb *ax25_create_cb(void) if ((ax25 = kzalloc(sizeof(*ax25), GFP_ATOMIC)) == NULL) return NULL; - atomic_set(&ax25->refcount, 1); + refcount_set(&ax25->refcount, 1); skb_queue_head_init(&ax25->write_queue); skb_queue_head_init(&ax25->frag_queue); @@ -1562,7 +1562,7 @@ static int ax25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) /* Add the PID if one is not supplied by the user in the skb */ if (!ax25->pidincl) - *skb_push(skb, 1) = sk->sk_protocol; + *(u8 *)skb_push(skb, 1) = sk->sk_protocol; if (sk->sk_type == SOCK_SEQPACKET) { /* Connected mode sockets go via the LAPB machine */ diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index e1fda27cb27c..0446b892618a 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -114,7 +114,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) return -ENOMEM; } - atomic_set(&ax25_rt->refcount, 1); + refcount_set(&ax25_rt->refcount, 1); ax25_rt->callsign = route->dest_addr; ax25_rt->dev = ax25_dev->dev; ax25_rt->digipeat = NULL; diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c index 0403b0def7e6..83b035f56202 100644 --- a/net/ax25/ax25_uid.c +++ b/net/ax25/ax25_uid.c @@ -107,7 +107,7 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax) if ((ax25_uid = kmalloc(sizeof(*ax25_uid), GFP_KERNEL)) == NULL) return -ENOMEM; - atomic_set(&ax25_uid->refcount, 1); + refcount_set(&ax25_uid->refcount, 1); ax25_uid->uid = sax25_kuid; ax25_uid->call = sax->sax25_call; diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 495ba7cdcb04..a3501173e200 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -732,8 +732,8 @@ static void batadv_iv_ogm_aggregate(struct batadv_forw_packet *forw_packet_aggr, unsigned char *skb_buff; unsigned long new_direct_link_flag; - skb_buff = skb_put(forw_packet_aggr->skb, packet_len); - memcpy(skb_buff, packet_buff, packet_len); + skb_buff = skb_put_data(forw_packet_aggr->skb, packet_buff, + packet_len); forw_packet_aggr->packet_len += packet_len; forw_packet_aggr->num_packets++; @@ -1022,7 +1022,8 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, u8 tq_avg; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "update_originator(): Searching and updating originator entry of received packet\n"); + "%s(): Searching and updating originator entry of received packet\n", + __func__); rcu_read_lock(); hlist_for_each_entry_rcu(tmp_neigh_node, @@ -1944,7 +1945,7 @@ static void batadv_iv_ogm_orig_print(struct batadv_priv *bat_priv, batadv_iv_ogm_orig_print_neigh(orig_node, if_outgoing, seq); - seq_puts(seq, "\n"); + seq_putc(seq, '\n'); batman_count++; next: diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index a36c8e7291d6..4e2724c5b33d 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -400,7 +400,7 @@ static void batadv_v_orig_print(struct batadv_priv *bat_priv, neigh_node->if_incoming->net_dev->name); batadv_v_orig_print_neigh(orig_node, if_outgoing, seq); - seq_puts(seq, "\n"); + seq_putc(seq, '\n'); batman_count++; next: diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index b90c9903e246..bd1064d98e16 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -19,6 +19,7 @@ #include "main.h" #include <linux/atomic.h> +#include <linux/bitops.h> #include <linux/byteorder/generic.h> #include <linux/errno.h> #include <linux/etherdevice.h> @@ -29,6 +30,7 @@ #include <linux/kernel.h> #include <linux/kref.h> #include <linux/netdevice.h> +#include <linux/nl80211.h> #include <linux/random.h> #include <linux/rculist.h> #include <linux/rcupdate.h> @@ -109,8 +111,12 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) */ return 0; } - if (!ret) - return sinfo.expected_throughput / 100; + if (ret) + goto default_throughput; + if (!(sinfo.filled & BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT))) + goto default_throughput; + + return sinfo.expected_throughput / 100; } /* if not a wifi interface, check if this device provides data via @@ -340,9 +346,8 @@ int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface) goto out; skb_reserve(hard_iface->bat_v.elp_skb, ETH_HLEN + NET_IP_ALIGN); - elp_buff = skb_put(hard_iface->bat_v.elp_skb, BATADV_ELP_HLEN); + elp_buff = skb_put_zero(hard_iface->bat_v.elp_skb, BATADV_ELP_HLEN); elp_packet = (struct batadv_elp_packet *)elp_buff; - memset(elp_packet, 0, BATADV_ELP_HLEN); elp_packet->packet_type = BATADV_ELP; elp_packet->version = BATADV_COMPAT_VERSION; diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index 03a35c9f456d..1e3dc374bfde 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -166,8 +166,7 @@ static void batadv_v_ogm_send(struct work_struct *work) goto reschedule; skb_reserve(skb, ETH_HLEN); - pkt_buff = skb_put(skb, ogm_buff_len); - memcpy(pkt_buff, ogm_buff, ogm_buff_len); + pkt_buff = skb_put_data(skb, ogm_buff, ogm_buff_len); ogm_packet = (struct batadv_ogm2_packet *)skb->data; ogm_packet->seqno = htonl(atomic_read(&bat_priv->bat_v.ogm_seqno)); @@ -382,8 +381,7 @@ static void batadv_v_ogm_forward(struct batadv_priv *bat_priv, goto out; skb_reserve(skb, ETH_HLEN); - skb_buff = skb_put(skb, packet_len); - memcpy(skb_buff, ogm_received, packet_len); + skb_buff = skb_put_data(skb, ogm_received, packet_len); /* apply forward penalty */ ogm_forward = (struct batadv_ogm2_packet *)skb_buff; diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index d07e89ec8467..cdd8e8e4df0b 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -394,7 +394,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac, */ ether_addr_copy(ethhdr->h_source, mac); batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_send_claim(): CLAIM %pM on vid %d\n", mac, + "%s(): CLAIM %pM on vid %d\n", __func__, mac, batadv_print_vid(vid)); break; case BATADV_CLAIM_TYPE_UNCLAIM: @@ -403,7 +403,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac, */ ether_addr_copy(hw_src, mac); batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_send_claim(): UNCLAIM %pM on vid %d\n", mac, + "%s(): UNCLAIM %pM on vid %d\n", __func__, mac, batadv_print_vid(vid)); break; case BATADV_CLAIM_TYPE_ANNOUNCE: @@ -412,7 +412,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac, */ ether_addr_copy(hw_src, mac); batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_send_claim(): ANNOUNCE of %pM on vid %d\n", + "%s(): ANNOUNCE of %pM on vid %d\n", __func__, ethhdr->h_source, batadv_print_vid(vid)); break; case BATADV_CLAIM_TYPE_REQUEST: @@ -423,15 +423,15 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac, ether_addr_copy(hw_src, mac); ether_addr_copy(ethhdr->h_dest, mac); batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_send_claim(): REQUEST of %pM to %pM on vid %d\n", + "%s(): REQUEST of %pM to %pM on vid %d\n", __func__, ethhdr->h_source, ethhdr->h_dest, batadv_print_vid(vid)); break; case BATADV_CLAIM_TYPE_LOOPDETECT: ether_addr_copy(ethhdr->h_source, mac); batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_send_claim(): LOOPDETECT of %pM to %pM on vid %d\n", - ethhdr->h_source, ethhdr->h_dest, + "%s(): LOOPDETECT of %pM to %pM on vid %d\n", + __func__, ethhdr->h_source, ethhdr->h_dest, batadv_print_vid(vid)); break; @@ -509,7 +509,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig, return entry; batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_get_backbone_gw(): not found (%pM, %d), creating new entry\n", + "%s(): not found (%pM, %d), creating new entry\n", __func__, orig, batadv_print_vid(vid)); entry = kzalloc(sizeof(*entry), GFP_ATOMIC); @@ -605,7 +605,8 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv, int i; batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_answer_request(): received a claim request, send all of our own claims again\n"); + "%s(): received a claim request, send all of our own claims again\n", + __func__); backbone_gw = batadv_backbone_hash_find(bat_priv, primary_if->net_dev->dev_addr, @@ -718,8 +719,8 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, kref_init(&claim->refcount); batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_add_claim(): adding new entry %pM, vid %d to hash ...\n", - mac, batadv_print_vid(vid)); + "%s(): adding new entry %pM, vid %d to hash ...\n", + __func__, mac, batadv_print_vid(vid)); kref_get(&claim->refcount); hash_added = batadv_hash_add(bat_priv->bla.claim_hash, @@ -739,8 +740,9 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, goto claim_free_ref; batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_add_claim(): changing ownership for %pM, vid %d to gw %pM\n", - mac, batadv_print_vid(vid), backbone_gw->orig); + "%s(): changing ownership for %pM, vid %d to gw %pM\n", + __func__, mac, batadv_print_vid(vid), + backbone_gw->orig); remove_crc = true; } @@ -808,7 +810,7 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, if (!claim) return; - batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_del_claim(): %pM, vid %d\n", + batadv_dbg(BATADV_DBG_BLA, bat_priv, "%s(): %pM, vid %d\n", __func__, mac, batadv_print_vid(vid)); batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim, @@ -848,8 +850,8 @@ static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr, crc = ntohs(*((__be16 *)(&an_addr[4]))); batadv_dbg(BATADV_DBG_BLA, bat_priv, - "handle_announce(): ANNOUNCE vid %d (sent by %pM)... CRC = %#.4x\n", - batadv_print_vid(vid), backbone_gw->orig, crc); + "%s(): ANNOUNCE vid %d (sent by %pM)... CRC = %#.4x\n", + __func__, batadv_print_vid(vid), backbone_gw->orig, crc); spin_lock_bh(&backbone_gw->crc_lock); backbone_crc = backbone_gw->crc; @@ -857,8 +859,8 @@ static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr, if (backbone_crc != crc) { batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv, - "handle_announce(): CRC FAILED for %pM/%d (my = %#.4x, sent = %#.4x)\n", - backbone_gw->orig, + "%s(): CRC FAILED for %pM/%d (my = %#.4x, sent = %#.4x)\n", + __func__, backbone_gw->orig, batadv_print_vid(backbone_gw->vid), backbone_crc, crc); @@ -903,8 +905,8 @@ static bool batadv_handle_request(struct batadv_priv *bat_priv, return true; batadv_dbg(BATADV_DBG_BLA, bat_priv, - "handle_request(): REQUEST vid %d (sent by %pM)...\n", - batadv_print_vid(vid), ethhdr->h_source); + "%s(): REQUEST vid %d (sent by %pM)...\n", + __func__, batadv_print_vid(vid), ethhdr->h_source); batadv_bla_answer_request(bat_priv, primary_if, vid); return true; @@ -940,7 +942,7 @@ static bool batadv_handle_unclaim(struct batadv_priv *bat_priv, /* this must be an UNCLAIM frame */ batadv_dbg(BATADV_DBG_BLA, bat_priv, - "handle_unclaim(): UNCLAIM %pM on vid %d (sent by %pM)...\n", + "%s(): UNCLAIM %pM on vid %d (sent by %pM)...\n", __func__, claim_addr, batadv_print_vid(vid), backbone_gw->orig); batadv_bla_del_claim(bat_priv, claim_addr, vid); @@ -1160,9 +1162,9 @@ static bool batadv_bla_process_claim(struct batadv_priv *bat_priv, ethhdr); if (ret == 1) batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_process_claim(): received a claim frame from another group. From: %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n", - ethhdr->h_source, batadv_print_vid(vid), hw_src, - hw_dst); + "%s(): received a claim frame from another group. From: %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n", + __func__, ethhdr->h_source, batadv_print_vid(vid), + hw_src, hw_dst); if (ret < 2) return !!ret; @@ -1196,8 +1198,9 @@ static bool batadv_bla_process_claim(struct batadv_priv *bat_priv, } batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_process_claim(): ERROR - this looks like a claim frame, but is useless. eth src %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n", - ethhdr->h_source, batadv_print_vid(vid), hw_src, hw_dst); + "%s(): ERROR - this looks like a claim frame, but is useless. eth src %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n", + __func__, ethhdr->h_source, batadv_print_vid(vid), hw_src, + hw_dst); return true; } @@ -1237,8 +1240,8 @@ static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now) continue; batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv, - "bla_purge_backbone_gw(): backbone gw %pM timed out\n", - backbone_gw->orig); + "%s(): backbone gw %pM timed out\n", + __func__, backbone_gw->orig); purge_now: /* don't wait for the pending request anymore */ @@ -1295,11 +1298,11 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv, goto skip; batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_purge_claims(): timed out.\n"); + "%s(): timed out.\n", __func__); purge_now: batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_purge_claims(): %pM, vid %d\n", + "%s(): %pM, vid %d\n", __func__, claim->addr, claim->vid); batadv_handle_unclaim(bat_priv, primary_if, @@ -1851,8 +1854,8 @@ bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, */ batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_rx(): Unclaimed MAC %pM found. Claim it. Local: %s\n", - ethhdr->h_source, + "%s(): Unclaimed MAC %pM found. Claim it. Local: %s\n", + __func__, ethhdr->h_source, batadv_is_my_client(bat_priv, ethhdr->h_source, vid) ? "yes" : "no"); @@ -1978,15 +1981,15 @@ bool batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, * older than 100 ms to make sure we really * have a roaming client here. */ - batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_tx(): Roaming client %pM detected. Unclaim it.\n", - ethhdr->h_source); + batadv_dbg(BATADV_DBG_BLA, bat_priv, "%s(): Roaming client %pM detected. Unclaim it.\n", + __func__, ethhdr->h_source); batadv_handle_unclaim(bat_priv, primary_if, primary_if->net_dev->dev_addr, ethhdr->h_source, vid); goto allow; } else { - batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_tx(): Race for claim %pM detected. Drop packet.\n", - ethhdr->h_source); + batadv_dbg(BATADV_DBG_BLA, bat_priv, "%s(): Race for claim %pM detected. Drop packet.\n", + __func__, ethhdr->h_source); goto handled; } } diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 000ca2f113ab..6930d6b50f99 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -601,7 +601,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst, BATADV_DAT_ADDR_MAX); batadv_dbg(BATADV_DBG_DAT, bat_priv, - "dat_select_candidates(): IP=%pI4 hash(IP)=%u\n", &ip_dst, + "%s(): IP=%pI4 hash(IP)=%u\n", __func__, &ip_dst, ip_key); for (select = 0; select < BATADV_DAT_CANDIDATES_NUM; select++) diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 8f964beaac28..a98cf1104a30 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -296,8 +296,7 @@ batadv_frag_merge_packets(struct hlist_head *chain) /* Copy the payload of the each fragment into the last skb */ hlist_for_each_entry(entry, chain, list) { size = entry->skb->len - hdr_size; - memcpy(skb_put(skb_out, size), entry->skb->data + hdr_size, - size); + skb_put_data(skb_out, entry->skb->data + hdr_size, size); } free: diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 6308c9f0fd96..8ead292886d1 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -207,7 +207,7 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff, skb->priority = TC_PRIO_CONTROL; skb_reserve(skb, ETH_HLEN); - icmp_header = (struct batadv_icmp_header *)skb_put(skb, packet_len); + icmp_header = skb_put(skb, packet_len); if (copy_from_user(icmp_header, buff, packet_len)) { len = -EFAULT; diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 810f7d026f54..2be8f1f46529 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -24,7 +24,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2017.1" +#define BATADV_SOURCE_VERSION "2017.2" #endif /* B.A.T.M.A.N. parameters */ @@ -168,7 +168,7 @@ enum batadv_uev_type { /* Maximum number of fragments for one packet */ #define BATADV_FRAG_MAX_FRAGMENTS 16 /* Maxumim size of each fragment */ -#define BATADV_FRAG_MAX_FRAG_SIZE 1400 +#define BATADV_FRAG_MAX_FRAG_SIZE 1280 /* Time to keep fragments while waiting for rest of the fragments */ #define BATADV_FRAG_TIMEOUT 10000 diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index e1f6fc72fe3e..3604d7899e2c 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -1935,9 +1935,7 @@ int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset) list) seq_printf(seq, "%pM ", nc_node->addr); - seq_puts(seq, "\n"); - - seq_puts(seq, " Outgoing: "); + seq_puts(seq, "\n Outgoing: "); /* For out_nc_node to this orig_node */ list_for_each_entry_rcu(nc_node, &orig_node->out_coding_list, diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index ae9f4d37d34f..f10e3ff26f9d 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -985,8 +985,8 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, batadv_orig_node_put(orig_node_gw); if (is_gw) { batadv_dbg(BATADV_DBG_BLA, bat_priv, - "recv_unicast_packet(): Dropped unicast pkt received from another backbone gw %pM.\n", - orig_addr_gw); + "%s(): Dropped unicast pkt received from another backbone gw %pM.\n", + __func__, orig_addr_gw); goto free_skb; } } diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 403df596a73d..d239a9d72ac3 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -971,11 +971,11 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, if (hard_iface) batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "purge_outstanding_packets(): %s\n", - hard_iface->net_dev->name); + "%s(): %s\n", + __func__, hard_iface->net_dev->name); else batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "purge_outstanding_packets()\n"); + "%s()\n", __func__); /* claim bcast list for free() */ spin_lock_bh(&bat_priv->forw_bcast_list_lock); diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 556f9a865ddf..bfe8effe9238 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -27,6 +27,7 @@ #include <linux/etherdevice.h> #include <linux/fs.h> #include <linux/if_ether.h> +#include <linux/init.h> #include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/kref.h> @@ -594,7 +595,7 @@ static int batadv_tp_send_msg(struct batadv_tp_vars *tp_vars, const u8 *src, return BATADV_TP_REASON_MEMORY_ERROR; skb_reserve(skb, ETH_HLEN); - icmp = (struct batadv_icmp_tp_packet *)skb_put(skb, sizeof(*icmp)); + icmp = skb_put(skb, sizeof(*icmp)); /* fill the icmp header */ ether_addr_copy(icmp->dst, orig_node->orig); @@ -611,7 +612,7 @@ static int batadv_tp_send_msg(struct batadv_tp_vars *tp_vars, const u8 *src, icmp->timestamp = htonl(timestamp); data_len = len - sizeof(*icmp); - data = (u8 *)skb_put(skb, data_len); + data = skb_put(skb, data_len); batadv_tp_fill_prerandom(tp_vars, data, data_len); r = batadv_send_skb_to_orig(skb, orig_node, NULL); @@ -1189,7 +1190,7 @@ static int batadv_tp_send_ack(struct batadv_priv *bat_priv, const u8 *dst, } skb_reserve(skb, ETH_HLEN); - icmp = (struct batadv_icmp_tp_packet *)skb_put(skb, sizeof(*icmp)); + icmp = skb_put(skb, sizeof(*icmp)); icmp->packet_type = BATADV_ICMP; icmp->version = BATADV_COMPAT_VERSION; icmp->ttl = BATADV_TTL; @@ -1497,7 +1498,7 @@ void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb) /** * batadv_tp_meter_init - initialize global tp_meter structures */ -void batadv_tp_meter_init(void) +void __init batadv_tp_meter_init(void) { get_random_bytes(batadv_tp_prerandom, sizeof(batadv_tp_prerandom)); } diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index e75b4937b497..e1133bc634b5 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -2488,18 +2488,16 @@ static bool _batadv_is_ap_isolated(struct batadv_tt_local_entry *tt_local_entry, struct batadv_tt_global_entry *tt_global_entry) { - bool ret = false; - if (tt_local_entry->common.flags & BATADV_TT_CLIENT_WIFI && tt_global_entry->common.flags & BATADV_TT_CLIENT_WIFI) - ret = true; + return true; /* check if the two clients are marked as isolated */ if (tt_local_entry->common.flags & BATADV_TT_CLIENT_ISOLA && tt_global_entry->common.flags & BATADV_TT_CLIENT_ISOLA) - ret = true; + return true; - return ret; + return false; } /** @@ -4010,19 +4008,22 @@ bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv, const unsigned char *addr, unsigned short vid) { - bool ret = false; + /* ignore loop detect macs, they are not supposed to be in the tt local + * data as well. + */ + if (batadv_bla_is_loopdetect_mac(addr)) + return false; if (!batadv_tt_global_add(bat_priv, orig_node, addr, vid, BATADV_TT_CLIENT_TEMP, atomic_read(&orig_node->last_ttvn))) - goto out; + return false; batadv_dbg(BATADV_DBG_TT, bat_priv, "Added temporary global client (addr: %pM, vid: %d, orig: %pM)\n", addr, batadv_print_vid(vid), orig_node->orig); - ret = true; -out: - return ret; + + return true; } /** diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index f0095fd79818..aad994edd3bb 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -239,7 +239,7 @@ static int a2mp_discover_rsp(struct amp_mgr *mgr, struct sk_buff *skb, } len -= sizeof(*cl); - cl = (void *) skb_pull(skb, sizeof(*cl)); + cl = skb_pull(skb, sizeof(*cl)); } /* Fall back to L2CAP init sequence */ @@ -279,7 +279,7 @@ static int a2mp_change_notify(struct amp_mgr *mgr, struct sk_buff *skb, while (skb->len >= sizeof(*cl)) { BT_DBG("Controller id %d type %d status %d", cl->id, cl->type, cl->status); - cl = (struct a2mp_cl *) skb_pull(skb, sizeof(*cl)); + cl = skb_pull(skb, sizeof(*cl)); } /* TODO send A2MP_CHANGE_RSP */ diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 42d0997e2fbb..91e3ba280706 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -657,7 +657,7 @@ static int bt_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%pK %-6d %-6u %-6u %-6u %-6lu %-6lu", sk, - atomic_read(&sk->sk_refcnt), + refcount_read(&sk->sk_refcnt), sk_rmem_alloc_get(sk), sk_wmem_alloc_get(sk), from_kuid(seq_user_ns(seq), sock_i_uid(sk)), @@ -733,7 +733,7 @@ void bt_procfs_cleanup(struct net *net, const char *name) EXPORT_SYMBOL(bt_procfs_init); EXPORT_SYMBOL(bt_procfs_cleanup); -static struct net_proto_family bt_sock_family_ops = { +static const struct net_proto_family bt_sock_family_ops = { .owner = THIS_MODULE, .family = PF_BLUETOOTH, .create = bt_sock_create, diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 5c4808b3da2d..7b3965861013 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -374,25 +374,22 @@ static int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb) /* Decompress header and construct ether frame */ switch (type & BNEP_TYPE_MASK) { case BNEP_COMPRESSED: - memcpy(__skb_put(nskb, ETH_HLEN), &s->eh, ETH_HLEN); + __skb_put_data(nskb, &s->eh, ETH_HLEN); break; case BNEP_COMPRESSED_SRC_ONLY: - memcpy(__skb_put(nskb, ETH_ALEN), s->eh.h_dest, ETH_ALEN); - memcpy(__skb_put(nskb, ETH_ALEN), skb_mac_header(skb), ETH_ALEN); + __skb_put_data(nskb, s->eh.h_dest, ETH_ALEN); + __skb_put_data(nskb, skb_mac_header(skb), ETH_ALEN); put_unaligned(s->eh.h_proto, (__be16 *) __skb_put(nskb, 2)); break; case BNEP_COMPRESSED_DST_ONLY: - memcpy(__skb_put(nskb, ETH_ALEN), skb_mac_header(skb), - ETH_ALEN); - memcpy(__skb_put(nskb, ETH_ALEN + 2), s->eh.h_source, - ETH_ALEN + 2); + __skb_put_data(nskb, skb_mac_header(skb), ETH_ALEN); + __skb_put_data(nskb, s->eh.h_source, ETH_ALEN + 2); break; case BNEP_GENERAL: - memcpy(__skb_put(nskb, ETH_ALEN * 2), skb_mac_header(skb), - ETH_ALEN * 2); + __skb_put_data(nskb, skb_mac_header(skb), ETH_ALEN * 2); put_unaligned(s->eh.h_proto, (__be16 *) __skb_put(nskb, 2)); break; } @@ -484,16 +481,16 @@ static int bnep_session(void *arg) struct net_device *dev = s->dev; struct sock *sk = s->sock->sk; struct sk_buff *skb; - wait_queue_entry_t wait; + DEFINE_WAIT_FUNC(wait, woken_wake_function); BT_DBG(""); set_user_nice(current, -15); - init_waitqueue_entry(&wait, current); add_wait_queue(sk_sleep(sk), &wait); while (1) { - set_current_state(TASK_INTERRUPTIBLE); + /* Ensure session->terminate is updated */ + smp_mb__before_atomic(); if (atomic_read(&s->terminate)) break; @@ -515,9 +512,8 @@ static int bnep_session(void *arg) break; netif_wake_queue(dev); - schedule(); + wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); } - __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); /* Cleanup session */ @@ -666,7 +662,7 @@ int bnep_del_connection(struct bnep_conndel_req *req) s = __bnep_get_session(req->dst); if (s) { atomic_inc(&s->terminate); - wake_up_process(s->task); + wake_up_interruptible(sk_sleep(s->sock->sk)); } else err = -ENOENT; diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index 2b875edf77e1..1d4d7d415730 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -75,16 +75,16 @@ static void bnep_net_set_mc_list(struct net_device *dev) u8 start[ETH_ALEN] = { 0x01 }; /* Request all addresses */ - memcpy(__skb_put(skb, ETH_ALEN), start, ETH_ALEN); - memcpy(__skb_put(skb, ETH_ALEN), dev->broadcast, ETH_ALEN); + __skb_put_data(skb, start, ETH_ALEN); + __skb_put_data(skb, dev->broadcast, ETH_ALEN); r->len = htons(ETH_ALEN * 2); } else { struct netdev_hw_addr *ha; int i, len = skb->len; if (dev->flags & IFF_BROADCAST) { - memcpy(__skb_put(skb, ETH_ALEN), dev->broadcast, ETH_ALEN); - memcpy(__skb_put(skb, ETH_ALEN), dev->broadcast, ETH_ALEN); + __skb_put_data(skb, dev->broadcast, ETH_ALEN); + __skb_put_data(skb, dev->broadcast, ETH_ALEN); } /* FIXME: We should group addresses here. */ @@ -93,8 +93,8 @@ static void bnep_net_set_mc_list(struct net_device *dev) netdev_for_each_mc_addr(ha, dev) { if (i == BNEP_MAX_MULTICAST_FILTERS) break; - memcpy(__skb_put(skb, ETH_ALEN), ha->addr, ETH_ALEN); - memcpy(__skb_put(skb, ETH_ALEN), ha->addr, ETH_ALEN); + __skb_put_data(skb, ha->addr, ETH_ALEN); + __skb_put_data(skb, ha->addr, ETH_ALEN); i++; } diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 14f7c8135c31..7f26a5a19ff6 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -122,7 +122,7 @@ static inline void cmtp_add_msgpart(struct cmtp_session *session, int id, const if (skb && (skb->len > 0)) skb_copy_from_linear_data(skb, skb_put(nskb, skb->len), skb->len); - memcpy(skb_put(nskb, count), buf, count); + skb_put_data(nskb, buf, count); session->reassembly[id] = nskb; @@ -280,16 +280,16 @@ static int cmtp_session(void *arg) struct cmtp_session *session = arg; struct sock *sk = session->sock->sk; struct sk_buff *skb; - wait_queue_entry_t wait; + DEFINE_WAIT_FUNC(wait, woken_wake_function); BT_DBG("session %p", session); set_user_nice(current, -15); - init_waitqueue_entry(&wait, current); add_wait_queue(sk_sleep(sk), &wait); while (1) { - set_current_state(TASK_INTERRUPTIBLE); + /* Ensure session->terminate is updated */ + smp_mb__before_atomic(); if (atomic_read(&session->terminate)) break; @@ -306,9 +306,8 @@ static int cmtp_session(void *arg) cmtp_process_transmit(session); - schedule(); + wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); } - __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); down_write(&cmtp_session_sem); @@ -393,7 +392,7 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock) err = cmtp_attach_device(session); if (err < 0) { atomic_inc(&session->terminate); - wake_up_process(session->task); + wake_up_interruptible(sk_sleep(session->sock->sk)); up_write(&cmtp_session_sem); return err; } @@ -431,7 +430,11 @@ int cmtp_del_connection(struct cmtp_conndel_req *req) /* Stop session thread */ atomic_inc(&session->terminate); - wake_up_process(session->task); + + /* Ensure session->terminate is updated */ + smp_mb__after_atomic(); + + wake_up_interruptible(sk_sleep(session->sock->sk)); } else err = -ENOENT; diff --git a/net/bluetooth/ecdh_helper.c b/net/bluetooth/ecdh_helper.c index 24d4e60f8c48..c7b1a9aee579 100644 --- a/net/bluetooth/ecdh_helper.c +++ b/net/bluetooth/ecdh_helper.c @@ -89,11 +89,9 @@ bool compute_ecdh_secret(const u8 public_key[64], const u8 private_key[32], p.curve_id = ECC_CURVE_NIST_P256; buf_len = crypto_ecdh_key_len(&p); buf = kmalloc(buf_len, GFP_KERNEL); - if (!buf) { - pr_err("alg: kpp: Failed to allocate %d bytes for buf\n", - buf_len); + if (!buf) goto free_req; - } + crypto_ecdh_encode_key(buf, buf_len, &p); /* Set A private Key */ @@ -170,11 +168,8 @@ bool generate_ecdh_keys(u8 public_key[64], u8 private_key[32]) p.key_size = 32; buf_len = crypto_ecdh_key_len(&p); buf = kmalloc(buf_len, GFP_KERNEL); - if (!buf) { - pr_err("alg: kpp: Failed to allocate %d bytes for buf\n", - buf_len); + if (!buf) goto free_req; - } do { if (tries++ >= max_tries) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 05686776a5fb..6bc679cd3481 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -148,13 +148,13 @@ static ssize_t vendor_diag_write(struct file *file, const char __user *user_buf, return -EINVAL; /* When the diagnostic flags are not persistent and the transport - * is not active, then there is no need for the vendor callback. - * - * Instead just store the desired value. If needed the setting - * will be programmed when the controller gets powered on. + * is not active or in user channel operation, then there is no need + * for the vendor callback. Instead just store the desired value and + * the setting will be programmed when the controller gets powered on. */ if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) && - !test_bit(HCI_RUNNING, &hdev->flags)) + (!test_bit(HCI_RUNNING, &hdev->flags) || + hci_dev_test_flag(hdev, HCI_USER_CHANNEL))) goto done; hci_req_sync_lock(hdev); @@ -548,6 +548,7 @@ static void hci_set_event_mask_page_2(struct hci_request *req) { struct hci_dev *hdev = req->hdev; u8 events[8] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + bool changed = false; /* If Connectionless Slave Broadcast master role is supported * enable all necessary events for it. @@ -557,6 +558,7 @@ static void hci_set_event_mask_page_2(struct hci_request *req) events[1] |= 0x80; /* Synchronization Train Complete */ events[2] |= 0x10; /* Slave Page Response Timeout */ events[2] |= 0x20; /* CSB Channel Map Change */ + changed = true; } /* If Connectionless Slave Broadcast slave role is supported @@ -567,13 +569,24 @@ static void hci_set_event_mask_page_2(struct hci_request *req) events[2] |= 0x02; /* CSB Receive */ events[2] |= 0x04; /* CSB Timeout */ events[2] |= 0x08; /* Truncated Page Complete */ + changed = true; } /* Enable Authenticated Payload Timeout Expired event if supported */ - if (lmp_ping_capable(hdev) || hdev->le_features[0] & HCI_LE_PING) + if (lmp_ping_capable(hdev) || hdev->le_features[0] & HCI_LE_PING) { events[2] |= 0x80; + changed = true; + } - hci_req_add(req, HCI_OP_SET_EVENT_MASK_PAGE_2, sizeof(events), events); + /* Some Broadcom based controllers indicate support for Set Event + * Mask Page 2 command, but then actually do not support it. Since + * the default value is all bits set to zero, the command is only + * required if the event mask has to be changed. In case no change + * to the event mask is needed, skip this command. + */ + if (changed) + hci_req_add(req, HCI_OP_SET_EVENT_MASK_PAGE_2, + sizeof(events), events); } static int hci_init3_req(struct hci_request *req, unsigned long opt) @@ -635,6 +648,14 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt) * Report */ + /* If the controller supports Channel Selection Algorithm #2 + * feature, enable the corresponding event. + */ + if (hdev->le_features[1] & HCI_LE_CHAN_SEL_ALG2) + events[2] |= 0x08; /* LE Channel Selection + * Algorithm + */ + /* If the controller supports the LE Set Scan Enable command, * enable the corresponding advertising report event. */ @@ -677,6 +698,12 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt) if (hdev->commands[34] & 0x04) events[1] |= 0x01; /* LE Generate DHKey Complete */ + /* If the controller supports the LE Set Default PHY or + * LE Set PHY commands, enable the corresponding event. + */ + if (hdev->commands[35] & (0x20 | 0x40)) + events[1] |= 0x08; /* LE PHY Update Complete */ + hci_req_add(req, HCI_OP_LE_SET_EVENT_MASK, sizeof(events), events); @@ -771,6 +798,27 @@ static int hci_init4_req(struct hci_request *req, unsigned long opt) sizeof(support), &support); } + /* Set Suggested Default Data Length to maximum if supported */ + if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) { + struct hci_cp_le_write_def_data_len cp; + + cp.tx_len = hdev->le_max_tx_len; + cp.tx_time = hdev->le_max_tx_time; + hci_req_add(req, HCI_OP_LE_WRITE_DEF_DATA_LEN, sizeof(cp), &cp); + } + + /* Set Default PHY parameters if command is supported */ + if (hdev->commands[35] & 0x20) { + struct hci_cp_le_set_default_phy cp; + + /* No transmitter PHY or receiver PHY preferences */ + cp.all_phys = 0x03; + cp.tx_phys = 0; + cp.rx_phys = 0; + + hci_req_add(req, HCI_OP_LE_SET_DEFAULT_PHY, sizeof(cp), &cp); + } + return 0; } @@ -1384,6 +1432,7 @@ static int hci_dev_do_open(struct hci_dev *hdev) * completed. */ if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) && + !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && hci_dev_test_flag(hdev, HCI_VENDOR_DIAG) && hdev->set_diag) ret = hdev->set_diag(hdev, true); @@ -3047,15 +3096,14 @@ int hci_register_dev(struct hci_dev *hdev) BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); - hdev->workqueue = alloc_workqueue("%s", WQ_HIGHPRI | WQ_UNBOUND | - WQ_MEM_RECLAIM, 1, hdev->name); + hdev->workqueue = alloc_ordered_workqueue("%s", WQ_HIGHPRI, hdev->name); if (!hdev->workqueue) { error = -ENOMEM; goto err; } - hdev->req_workqueue = alloc_workqueue("%s", WQ_HIGHPRI | WQ_UNBOUND | - WQ_MEM_RECLAIM, 1, hdev->name); + hdev->req_workqueue = alloc_ordered_workqueue("%s", WQ_HIGHPRI, + hdev->name); if (!hdev->req_workqueue) { destroy_workqueue(hdev->workqueue); error = -ENOMEM; @@ -3217,7 +3265,7 @@ int hci_reset_dev(struct hci_dev *hdev) return -ENOMEM; hci_skb_pkt_type(skb) = HCI_EVENT_PKT; - memcpy(skb_put(skb, 3), hw_err, 3); + skb_put_data(skb, hw_err, 3); /* Send Hardware Error to upper stack */ return hci_recv_frame(hdev, skb); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index b5faff458d8b..b73ac149de34 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -299,12 +299,12 @@ struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, if (!skb) return NULL; - hdr = (struct hci_command_hdr *) skb_put(skb, HCI_COMMAND_HDR_SIZE); + hdr = skb_put(skb, HCI_COMMAND_HDR_SIZE); hdr->opcode = cpu_to_le16(opcode); hdr->plen = plen; if (plen) - memcpy(skb_put(skb, plen), param, plen); + skb_put_data(skb, param, plen); BT_DBG("skb len %d", skb->len); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 638bf0e1a2e3..65d734c165bd 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -332,7 +332,7 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) return; /* Put header before the data */ - hdr = (void *)skb_push(skb_copy, HCI_MON_HDR_SIZE); + hdr = skb_push(skb_copy, HCI_MON_HDR_SIZE); hdr->opcode = opcode; hdr->index = cpu_to_le16(hdev->id); hdr->len = cpu_to_le16(skb->len); @@ -379,11 +379,11 @@ void hci_send_monitor_ctrl_event(struct hci_dev *hdev, u16 event, put_unaligned_le16(event, skb_put(skb, 2)); if (data) - memcpy(skb_put(skb, data_len), data, data_len); + skb_put_data(skb, data, data_len); skb->tstamp = tstamp; - hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr = skb_push(skb, HCI_MON_HDR_SIZE); hdr->opcode = cpu_to_le16(HCI_MON_CTRL_EVENT); hdr->index = index; hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); @@ -410,7 +410,7 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) if (!skb) return NULL; - ni = (void *)skb_put(skb, HCI_MON_NEW_INDEX_SIZE); + ni = skb_put(skb, HCI_MON_NEW_INDEX_SIZE); ni->type = hdev->dev_type; ni->bus = hdev->bus; bacpy(&ni->bdaddr, &hdev->bdaddr); @@ -438,7 +438,7 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) if (!skb) return NULL; - ii = (void *)skb_put(skb, HCI_MON_INDEX_INFO_SIZE); + ii = skb_put(skb, HCI_MON_INDEX_INFO_SIZE); bacpy(&ii->bdaddr, &hdev->bdaddr); ii->manufacturer = cpu_to_le16(hdev->manufacturer); @@ -467,7 +467,7 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) __net_timestamp(skb); - hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr = skb_push(skb, HCI_MON_HDR_SIZE); hdr->opcode = opcode; hdr->index = cpu_to_le16(hdev->id); hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); @@ -515,14 +515,14 @@ static struct sk_buff *create_monitor_ctrl_open(struct sock *sk) put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); put_unaligned_le16(format, skb_put(skb, 2)); - memcpy(skb_put(skb, sizeof(ver)), ver, sizeof(ver)); + skb_put_data(skb, ver, sizeof(ver)); put_unaligned_le32(flags, skb_put(skb, 4)); - *skb_put(skb, 1) = TASK_COMM_LEN; - memcpy(skb_put(skb, TASK_COMM_LEN), hci_pi(sk)->comm, TASK_COMM_LEN); + skb_put_u8(skb, TASK_COMM_LEN); + skb_put_data(skb, hci_pi(sk)->comm, TASK_COMM_LEN); __net_timestamp(skb); - hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr = skb_push(skb, HCI_MON_HDR_SIZE); hdr->opcode = cpu_to_le16(HCI_MON_CTRL_OPEN); if (hci_pi(sk)->hdev) hdr->index = cpu_to_le16(hci_pi(sk)->hdev->id); @@ -560,7 +560,7 @@ static struct sk_buff *create_monitor_ctrl_close(struct sock *sk) __net_timestamp(skb); - hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr = skb_push(skb, HCI_MON_HDR_SIZE); hdr->opcode = cpu_to_le16(HCI_MON_CTRL_CLOSE); if (hci_pi(sk)->hdev) hdr->index = cpu_to_le16(hci_pi(sk)->hdev->id); @@ -586,11 +586,11 @@ static struct sk_buff *create_monitor_ctrl_command(struct sock *sk, u16 index, put_unaligned_le16(opcode, skb_put(skb, 2)); if (buf) - memcpy(skb_put(skb, len), buf, len); + skb_put_data(skb, buf, len); __net_timestamp(skb); - hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr = skb_push(skb, HCI_MON_HDR_SIZE); hdr->opcode = cpu_to_le16(HCI_MON_CTRL_COMMAND); hdr->index = cpu_to_le16(index); hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); @@ -616,7 +616,7 @@ send_monitor_note(struct sock *sk, const char *fmt, ...) va_start(args, fmt); vsprintf(skb_put(skb, len), fmt, args); - *skb_put(skb, 1) = 0; + *(u8 *)skb_put(skb, 1) = 0; va_end(args); __net_timestamp(skb); @@ -703,11 +703,11 @@ static void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data) if (!skb) return; - hdr = (void *)skb_put(skb, HCI_EVENT_HDR_SIZE); + hdr = skb_put(skb, HCI_EVENT_HDR_SIZE); hdr->evt = HCI_EV_STACK_INTERNAL; hdr->plen = sizeof(*ev) + dlen; - ev = (void *)skb_put(skb, sizeof(*ev) + dlen); + ev = skb_put(skb, sizeof(*ev) + dlen); ev->type = type; memcpy(ev->data, data, dlen); diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index fc31161e98f2..002743ea509c 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -36,6 +36,7 @@ #define VERSION "1.2" static DECLARE_RWSEM(hidp_session_sem); +static DECLARE_WAIT_QUEUE_HEAD(hidp_session_wq); static LIST_HEAD(hidp_session_list); static unsigned char hidp_keycode[256] = { @@ -112,9 +113,9 @@ static int hidp_send_message(struct hidp_session *session, struct socket *sock, return -ENOMEM; } - *skb_put(skb, 1) = hdr; + skb_put_u8(skb, hdr); if (data && size > 0) - memcpy(skb_put(skb, size), data, size); + skb_put_data(skb, data, size); skb_queue_tail(transmit, skb); wake_up_interruptible(sk_sleep(sk)); @@ -1068,12 +1069,12 @@ static int hidp_session_start_sync(struct hidp_session *session) * Wake up session thread and notify it to stop. This is asynchronous and * returns immediately. Call this whenever a runtime error occurs and you want * the session to stop. - * Note: wake_up_process() performs any necessary memory-barriers for us. + * Note: wake_up_interruptible() performs any necessary memory-barriers for us. */ static void hidp_session_terminate(struct hidp_session *session) { atomic_inc(&session->terminate); - wake_up_process(session->task); + wake_up_interruptible(&hidp_session_wq); } /* @@ -1180,7 +1181,9 @@ static void hidp_session_run(struct hidp_session *session) struct sock *ctrl_sk = session->ctrl_sock->sk; struct sock *intr_sk = session->intr_sock->sk; struct sk_buff *skb; + DEFINE_WAIT_FUNC(wait, woken_wake_function); + add_wait_queue(&hidp_session_wq, &wait); for (;;) { /* * This thread can be woken up two ways: @@ -1188,12 +1191,10 @@ static void hidp_session_run(struct hidp_session *session) * session->terminate flag and wakes this thread up. * - Via modifying the socket state of ctrl/intr_sock. This * thread is woken up by ->sk_state_changed(). - * - * Note: set_current_state() performs any necessary - * memory-barriers for us. */ - set_current_state(TASK_INTERRUPTIBLE); + /* Ensure session->terminate is updated */ + smp_mb__before_atomic(); if (atomic_read(&session->terminate)) break; @@ -1227,11 +1228,22 @@ static void hidp_session_run(struct hidp_session *session) hidp_process_transmit(session, &session->ctrl_transmit, session->ctrl_sock); - schedule(); + wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); } + remove_wait_queue(&hidp_session_wq, &wait); atomic_inc(&session->terminate); - set_current_state(TASK_RUNNING); + + /* Ensure session->terminate is updated */ + smp_mb__after_atomic(); +} + +static int hidp_session_wake_function(wait_queue_entry_t *wait, + unsigned int mode, + int sync, void *key) +{ + wake_up_interruptible(&hidp_session_wq); + return false; } /* @@ -1244,7 +1256,8 @@ static void hidp_session_run(struct hidp_session *session) static int hidp_session_thread(void *arg) { struct hidp_session *session = arg; - wait_queue_entry_t ctrl_wait, intr_wait; + DEFINE_WAIT_FUNC(ctrl_wait, hidp_session_wake_function); + DEFINE_WAIT_FUNC(intr_wait, hidp_session_wake_function); BT_DBG("session %p", session); @@ -1254,8 +1267,6 @@ static int hidp_session_thread(void *arg) set_user_nice(current, -15); hidp_set_timer(session); - init_waitqueue_entry(&ctrl_wait, current); - init_waitqueue_entry(&intr_wait, current); add_wait_queue(sk_sleep(session->ctrl_sock->sk), &ctrl_wait); add_wait_queue(sk_sleep(session->intr_sock->sk), &intr_wait); /* This memory barrier is paired with wq_has_sleeper(). See diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index f88ac99528ce..303c779bfe38 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1048,7 +1048,7 @@ static struct sk_buff *l2cap_create_sframe_pdu(struct l2cap_chan *chan, if (!skb) return ERR_PTR(-ENOMEM); - lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); + lh = skb_put(skb, L2CAP_HDR_SIZE); lh->len = cpu_to_le16(hlen - L2CAP_HDR_SIZE); lh->cid = cpu_to_le16(chan->dcid); @@ -2182,7 +2182,7 @@ static struct sk_buff *l2cap_create_connless_pdu(struct l2cap_chan *chan, return skb; /* Create L2CAP header */ - lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); + lh = skb_put(skb, L2CAP_HDR_SIZE); lh->cid = cpu_to_le16(chan->dcid); lh->len = cpu_to_le16(len + L2CAP_PSMLEN_SIZE); put_unaligned(chan->psm, (__le16 *) skb_put(skb, L2CAP_PSMLEN_SIZE)); @@ -2213,7 +2213,7 @@ static struct sk_buff *l2cap_create_basic_pdu(struct l2cap_chan *chan, return skb; /* Create L2CAP header */ - lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); + lh = skb_put(skb, L2CAP_HDR_SIZE); lh->cid = cpu_to_le16(chan->dcid); lh->len = cpu_to_le16(len); @@ -2255,7 +2255,7 @@ static struct sk_buff *l2cap_create_iframe_pdu(struct l2cap_chan *chan, return skb; /* Create L2CAP header */ - lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); + lh = skb_put(skb, L2CAP_HDR_SIZE); lh->cid = cpu_to_le16(chan->dcid); lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE)); @@ -2373,7 +2373,7 @@ static struct sk_buff *l2cap_create_le_flowctl_pdu(struct l2cap_chan *chan, return skb; /* Create L2CAP header */ - lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); + lh = skb_put(skb, L2CAP_HDR_SIZE); lh->cid = cpu_to_le16(chan->dcid); lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE)); @@ -2908,7 +2908,7 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, u8 code, if (!skb) return NULL; - lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); + lh = skb_put(skb, L2CAP_HDR_SIZE); lh->len = cpu_to_le16(L2CAP_CMD_HDR_SIZE + dlen); if (conn->hcon->type == LE_LINK) @@ -2916,14 +2916,14 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, u8 code, else lh->cid = cpu_to_le16(L2CAP_CID_SIGNALING); - cmd = (struct l2cap_cmd_hdr *) skb_put(skb, L2CAP_CMD_HDR_SIZE); + cmd = skb_put(skb, L2CAP_CMD_HDR_SIZE); cmd->code = code; cmd->ident = ident; cmd->len = cpu_to_le16(dlen); if (dlen) { count -= L2CAP_HDR_SIZE + L2CAP_CMD_HDR_SIZE; - memcpy(skb_put(skb, count), data, count); + skb_put_data(skb, data, count); data += count; } @@ -2938,7 +2938,7 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, u8 code, if (!*frag) goto fail; - memcpy(skb_put(*frag, count), data, count); + skb_put_data(*frag, data, count); len -= count; data += count; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 507b80d59dec..67a8642f57ea 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -87,7 +87,8 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) BT_DBG("sk %p", sk); - if (!addr || addr->sa_family != AF_BLUETOOTH) + if (!addr || alen < offsetofend(struct sockaddr, sa_family) || + addr->sa_family != AF_BLUETOOTH) return -EINVAL; memset(&la, 0, sizeof(la)); @@ -181,7 +182,7 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, BT_DBG("sk %p", sk); - if (!addr || alen < sizeof(addr->sa_family) || + if (!addr || alen < offsetofend(struct sockaddr, sa_family) || addr->sa_family != AF_BLUETOOTH) return -EINVAL; diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c index c933bd08c1fe..0d0a6d77b9e8 100644 --- a/net/bluetooth/mgmt_util.c +++ b/net/bluetooth/mgmt_util.c @@ -44,11 +44,11 @@ static struct sk_buff *create_monitor_ctrl_event(__le16 index, u32 cookie, put_unaligned_le16(opcode, skb_put(skb, 2)); if (buf) - memcpy(skb_put(skb, len), buf, len); + skb_put_data(skb, buf, len); __net_timestamp(skb); - hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr = skb_push(skb, HCI_MON_HDR_SIZE); hdr->opcode = cpu_to_le16(HCI_MON_CTRL_EVENT); hdr->index = index; hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); @@ -66,7 +66,7 @@ int mgmt_send_event(u16 event, struct hci_dev *hdev, unsigned short channel, if (!skb) return -ENOMEM; - hdr = (void *) skb_put(skb, sizeof(*hdr)); + hdr = skb_put(skb, sizeof(*hdr)); hdr->opcode = cpu_to_le16(event); if (hdev) hdr->index = cpu_to_le16(hdev->id); @@ -75,7 +75,7 @@ int mgmt_send_event(u16 event, struct hci_dev *hdev, unsigned short channel, hdr->len = cpu_to_le16(data_len); if (data) - memcpy(skb_put(skb, data_len), data, data_len); + skb_put_data(skb, data, data_len); /* Time stamp */ __net_timestamp(skb); @@ -103,13 +103,13 @@ int mgmt_cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status) if (!skb) return -ENOMEM; - hdr = (void *) skb_put(skb, sizeof(*hdr)); + hdr = skb_put(skb, sizeof(*hdr)); hdr->opcode = cpu_to_le16(MGMT_EV_CMD_STATUS); hdr->index = cpu_to_le16(index); hdr->len = cpu_to_le16(sizeof(*ev)); - ev = (void *) skb_put(skb, sizeof(*ev)); + ev = skb_put(skb, sizeof(*ev)); ev->status = status; ev->opcode = cpu_to_le16(cmd); @@ -147,13 +147,13 @@ int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status, if (!skb) return -ENOMEM; - hdr = (void *) skb_put(skb, sizeof(*hdr)); + hdr = skb_put(skb, sizeof(*hdr)); hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); hdr->index = cpu_to_le16(index); hdr->len = cpu_to_le16(sizeof(*ev) + rp_len); - ev = (void *) skb_put(skb, sizeof(*ev) + rp_len); + ev = skb_put(skb, sizeof(*ev) + rp_len); ev->opcode = cpu_to_le16(cmd); ev->status = status; diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 8ebca9033d60..4a0b41d75c84 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -863,7 +863,7 @@ static int rfcomm_queue_disc(struct rfcomm_dlc *d) if (!skb) return -ENOMEM; - cmd = (void *) __skb_put(skb, sizeof(*cmd)); + cmd = __skb_put(skb, sizeof(*cmd)); cmd->addr = d->addr; cmd->ctrl = __ctrl(RFCOMM_DISC, 1); cmd->len = __len8(0); @@ -1149,10 +1149,10 @@ static void rfcomm_make_uih(struct sk_buff *skb, u8 addr) u8 *crc; if (len > 127) { - hdr = (void *) skb_push(skb, 4); + hdr = skb_push(skb, 4); put_unaligned(cpu_to_le16(__len16(len)), (__le16 *) &hdr->len); } else { - hdr = (void *) skb_push(skb, 3); + hdr = skb_push(skb, 3); hdr->len = __len8(len); } hdr->addr = addr; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index ac3c650cb234..1aaccf637479 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -197,7 +197,7 @@ static void rfcomm_sock_kill(struct sock *sk) if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket) return; - BT_DBG("sk %p state %d refcnt %d", sk, sk->sk_state, atomic_read(&sk->sk_refcnt)); + BT_DBG("sk %p state %d refcnt %d", sk, sk->sk_state, refcount_read(&sk->sk_refcnt)); /* Kill poor orphan */ bt_sock_unlink(&rfcomm_sk_list, sk); @@ -339,7 +339,8 @@ static int rfcomm_sock_bind(struct socket *sock, struct sockaddr *addr, int addr struct sock *sk = sock->sk; int len, err = 0; - if (!addr || addr->sa_family != AF_BLUETOOTH) + if (!addr || addr_len < offsetofend(struct sockaddr, sa_family) || + addr->sa_family != AF_BLUETOOTH) return -EINVAL; memset(&sa, 0, sizeof(sa)); diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 2f2cb5e27cdd..5f3074cb6b4d 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -798,7 +798,7 @@ static int rfcomm_tty_write(struct tty_struct *tty, const unsigned char *buf, in skb_reserve(skb, RFCOMM_SKB_HEAD_RESERVE); - memcpy(skb_put(skb, size), buf + sent, size); + skb_put_data(skb, buf + sent, size); rfcomm_dlc_send_noerror(dlc, skb); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 728e0c8dc8e7..795e920a3281 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -524,10 +524,8 @@ static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, BT_DBG("sk %p %pMR", sk, &sa->sco_bdaddr); - if (!addr || addr->sa_family != AF_BLUETOOTH) - return -EINVAL; - - if (addr_len < sizeof(struct sockaddr_sco)) + if (!addr || addr_len < sizeof(struct sockaddr_sco) || + addr->sa_family != AF_BLUETOOTH) return -EINVAL; lock_sock(sk); diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 14585edc9439..a0ef89772c36 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -23,6 +23,7 @@ #include <linux/debugfs.h> #include <linux/scatterlist.h> #include <linux/crypto.h> +#include <crypto/algapi.h> #include <crypto/b128ops.h> #include <crypto/hash.h> @@ -523,7 +524,7 @@ bool smp_irk_matches(struct hci_dev *hdev, const u8 irk[16], if (err) return false; - return !memcmp(bdaddr->b, hash, 3); + return !crypto_memneq(bdaddr->b, hash, 3); } int smp_generate_rpa(struct hci_dev *hdev, const u8 irk[16], bdaddr_t *rpa) @@ -579,7 +580,7 @@ int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16]) /* This is unlikely, but we need to check that * we didn't accidentially generate a debug key. */ - if (memcmp(smp->local_sk, debug_sk, 32)) + if (crypto_memneq(smp->local_sk, debug_sk, 32)) break; } smp->debug_key = false; @@ -993,7 +994,7 @@ static u8 smp_random(struct smp_chan *smp) if (ret) return SMP_UNSPECIFIED; - if (memcmp(smp->pcnf, confirm, sizeof(smp->pcnf)) != 0) { + if (crypto_memneq(smp->pcnf, confirm, sizeof(smp->pcnf))) { BT_ERR("Pairing failed (confirmation values mismatch)"); return SMP_CONFIRM_FAILED; } @@ -1512,7 +1513,7 @@ static u8 sc_passkey_round(struct smp_chan *smp, u8 smp_op) smp->rrnd, r, cfm)) return SMP_UNSPECIFIED; - if (memcmp(smp->pcnf, cfm, 16)) + if (crypto_memneq(smp->pcnf, cfm, 16)) return SMP_CONFIRM_FAILED; smp->passkey_round++; @@ -1908,7 +1909,7 @@ static u8 sc_send_public_key(struct smp_chan *smp) /* This is unlikely, but we need to check that * we didn't accidentially generate a debug key. */ - if (memcmp(smp->local_sk, debug_sk, 32)) + if (crypto_memneq(smp->local_sk, debug_sk, 32)) break; } } @@ -2176,7 +2177,7 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) if (err) return SMP_UNSPECIFIED; - if (memcmp(smp->pcnf, cfm, 16)) + if (crypto_memneq(smp->pcnf, cfm, 16)) return SMP_CONFIRM_FAILED; } else { smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), @@ -2660,7 +2661,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) if (err) return SMP_UNSPECIFIED; - if (memcmp(cfm.confirm_val, smp->pcnf, 16)) + if (crypto_memneq(cfm.confirm_val, smp->pcnf, 16)) return SMP_CONFIRM_FAILED; } @@ -2693,7 +2694,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) else hcon->pending_sec_level = BT_SECURITY_FIPS; - if (!memcmp(debug_pk, smp->remote_pk, 64)) + if (!crypto_memneq(debug_pk, smp->remote_pk, 64)) set_bit(SMP_FLAG_DEBUG_KEY, &smp->flags); if (smp->method == DSP_PASSKEY) { @@ -2792,7 +2793,7 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb) if (err) return SMP_UNSPECIFIED; - if (memcmp(check->e, e, 16)) + if (crypto_memneq(check->e, e, 16)) return SMP_DHKEY_CHECK_FAILED; if (!hcon->out) { @@ -3506,10 +3507,10 @@ static int __init test_debug_key(void) if (!generate_ecdh_keys(pk, sk)) return -EINVAL; - if (memcmp(sk, debug_sk, 32)) + if (crypto_memneq(sk, debug_sk, 32)) return -EINVAL; - if (memcmp(pk, debug_pk, 64)) + if (crypto_memneq(pk, debug_pk, 64)) return -EINVAL; return 0; @@ -3529,7 +3530,7 @@ static int __init test_ah(struct crypto_cipher *tfm_aes) if (err) return err; - if (memcmp(res, exp, 3)) + if (crypto_memneq(res, exp, 3)) return -EINVAL; return 0; @@ -3559,7 +3560,7 @@ static int __init test_c1(struct crypto_cipher *tfm_aes) if (err) return err; - if (memcmp(res, exp, 16)) + if (crypto_memneq(res, exp, 16)) return -EINVAL; return 0; @@ -3584,7 +3585,7 @@ static int __init test_s1(struct crypto_cipher *tfm_aes) if (err) return err; - if (memcmp(res, exp, 16)) + if (crypto_memneq(res, exp, 16)) return -EINVAL; return 0; @@ -3616,7 +3617,7 @@ static int __init test_f4(struct crypto_shash *tfm_cmac) if (err) return err; - if (memcmp(res, exp, 16)) + if (crypto_memneq(res, exp, 16)) return -EINVAL; return 0; @@ -3650,10 +3651,10 @@ static int __init test_f5(struct crypto_shash *tfm_cmac) if (err) return err; - if (memcmp(mackey, exp_mackey, 16)) + if (crypto_memneq(mackey, exp_mackey, 16)) return -EINVAL; - if (memcmp(ltk, exp_ltk, 16)) + if (crypto_memneq(ltk, exp_ltk, 16)) return -EINVAL; return 0; @@ -3686,7 +3687,7 @@ static int __init test_f6(struct crypto_shash *tfm_cmac) if (err) return err; - if (memcmp(res, exp, 16)) + if (crypto_memneq(res, exp, 16)) return -EINVAL; return 0; @@ -3740,7 +3741,7 @@ static int __init test_h6(struct crypto_shash *tfm_cmac) if (err) return err; - if (memcmp(res, exp, 16)) + if (crypto_memneq(res, exp, 16)) return -EINVAL; return 0; diff --git a/net/bridge/br.c b/net/bridge/br.c index 889e5640455f..1407d1ba7577 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -121,7 +121,7 @@ static struct notifier_block br_device_notifier = { .notifier_call = br_device_event }; -/* called with RTNL */ +/* called with RTNL or RCU */ static int br_switchdev_event(struct notifier_block *unused, unsigned long event, void *ptr) { @@ -131,27 +131,36 @@ static int br_switchdev_event(struct notifier_block *unused, struct switchdev_notifier_fdb_info *fdb_info; int err = NOTIFY_DONE; - p = br_port_get_rtnl(dev); + p = br_port_get_rtnl_rcu(dev); if (!p) goto out; br = p->br; switch (event) { - case SWITCHDEV_FDB_ADD: + case SWITCHDEV_FDB_ADD_TO_BRIDGE: fdb_info = ptr; err = br_fdb_external_learn_add(br, p, fdb_info->addr, fdb_info->vid); - if (err) + if (err) { err = notifier_from_errno(err); + break; + } + br_fdb_offloaded_set(br, p, fdb_info->addr, + fdb_info->vid); break; - case SWITCHDEV_FDB_DEL: + case SWITCHDEV_FDB_DEL_TO_BRIDGE: fdb_info = ptr; err = br_fdb_external_learn_del(br, p, fdb_info->addr, fdb_info->vid); if (err) err = notifier_from_errno(err); break; + case SWITCHDEV_FDB_OFFLOADED: + fdb_info = ptr; + br_fdb_offloaded_set(br, p, fdb_info->addr, + fdb_info->vid); + break; } out: diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index ab0c7cc8448f..a5e4a736a984 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -511,6 +511,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, fdb->is_static = is_static; fdb->added_by_user = 0; fdb->added_by_external_learn = 0; + fdb->offloaded = 0; fdb->updated = fdb->used = jiffies; hlist_add_head_rcu(&fdb->hlist, head); } @@ -647,11 +648,16 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, ndm->ndm_family = AF_BRIDGE; ndm->ndm_pad1 = 0; ndm->ndm_pad2 = 0; - ndm->ndm_flags = fdb->added_by_external_learn ? NTF_EXT_LEARNED : 0; + ndm->ndm_flags = 0; ndm->ndm_type = 0; ndm->ndm_ifindex = fdb->dst ? fdb->dst->dev->ifindex : br->dev->ifindex; ndm->ndm_state = fdb_to_nud(br, fdb); + if (fdb->offloaded) + ndm->ndm_flags |= NTF_OFFLOADED; + if (fdb->added_by_external_learn) + ndm->ndm_flags |= NTF_EXT_LEARNED; + if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->addr)) goto nla_put_failure; if (nla_put_u32(skb, NDA_MASTER, br->dev->ifindex)) @@ -690,6 +696,8 @@ static void fdb_notify(struct net_bridge *br, struct sk_buff *skb; int err = -ENOBUFS; + br_switchdev_fdb_notify(fdb, type); + skb = nlmsg_new(fdb_nlmsg_size(), GFP_ATOMIC); if (skb == NULL) goto errout; @@ -1071,11 +1079,11 @@ void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p) int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid) { - struct hlist_head *head; struct net_bridge_fdb_entry *fdb; + struct hlist_head *head; + bool modified = false; int err = 0; - ASSERT_RTNL(); spin_lock_bh(&br->hash_lock); head = &br->hash[br_mac_hash(addr, vid)]; @@ -1088,14 +1096,25 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, } fdb->added_by_external_learn = 1; fdb_notify(br, fdb, RTM_NEWNEIGH); - } else if (fdb->added_by_external_learn) { - /* Refresh entry */ - fdb->updated = fdb->used = jiffies; - } else if (!fdb->added_by_user) { - /* Take over SW learned entry */ - fdb->added_by_external_learn = 1; + } else { fdb->updated = jiffies; - fdb_notify(br, fdb, RTM_NEWNEIGH); + + if (fdb->dst != p) { + fdb->dst = p; + modified = true; + } + + if (fdb->added_by_external_learn) { + /* Refresh entry */ + fdb->used = jiffies; + } else if (!fdb->added_by_user) { + /* Take over SW learned entry */ + fdb->added_by_external_learn = 1; + modified = true; + } + + if (modified) + fdb_notify(br, fdb, RTM_NEWNEIGH); } err_unlock: @@ -1110,7 +1129,6 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, struct net_bridge_fdb_entry *fdb; int err = 0; - ASSERT_RTNL(); spin_lock_bh(&br->hash_lock); fdb = br_fdb_find(br, addr, vid); @@ -1123,3 +1141,17 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, return err; } + +void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p, + const unsigned char *addr, u16 vid) +{ + struct net_bridge_fdb_entry *fdb; + + spin_lock_bh(&br->hash_lock); + + fdb = br_fdb_find(br, addr, vid); + if (fdb) + fdb->offloaded = 1; + + spin_unlock_bh(&br->hash_lock); +} diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 7f8d05cf9065..f3aef22931ab 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -138,7 +138,7 @@ void br_manage_promisc(struct net_bridge *br) /* If vlan filtering is disabled or bridge interface is placed * into promiscuous mode, place all ports in promiscuous mode. */ - if ((br->dev->flags & IFF_PROMISC) || !br_vlan_enabled(br)) + if ((br->dev->flags & IFF_PROMISC) || !br_vlan_enabled(br->dev)) set_all = true; list_for_each_entry(p, &br->port_list, list) { diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index b0845480a3ae..09dcdb9c0f3c 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -599,7 +599,7 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; vg = nbp_vlan_group(p); - if (br_vlan_enabled(br) && vg && entry->vid == 0) { + if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) { list_for_each_entry(v, &vg->vlan_list, vlist) { entry->vid = v->vid; err = __br_mdb_add(net, br, entry); @@ -694,7 +694,7 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; vg = nbp_vlan_group(p); - if (br_vlan_enabled(br) && vg && entry->vid == 0) { + if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) { list_for_each_entry(v, &vg->vlan_list, vlist) { entry->vid = v->vid; err = __br_mdb_del(br, entry); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index faa7261a992f..8dc5c8d69bcd 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -2176,6 +2176,14 @@ unlock: return err; } +bool br_multicast_enabled(const struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + + return !br->multicast_disabled; +} +EXPORT_SYMBOL_GPL(br_multicast_enabled); + int br_multicast_set_querier(struct net_bridge *br, unsigned long val) { unsigned long max_delay; diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 067cf0313449..2261e5194c82 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -149,12 +149,12 @@ static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = skb->nf_bridge; - if (atomic_read(&nf_bridge->use) > 1) { + if (refcount_read(&nf_bridge->use) > 1) { struct nf_bridge_info *tmp = nf_bridge_alloc(skb); if (tmp) { memcpy(tmp, nf_bridge, sizeof(struct nf_bridge_info)); - atomic_set(&tmp->use, 1); + refcount_set(&tmp->use, 1); } nf_bridge_put(nf_bridge); nf_bridge = tmp; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 32bd3ead9ba1..3bc890716c89 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -662,16 +662,26 @@ static int br_set_port_state(struct net_bridge_port *p, u8 state) } /* Set/clear or port flags based on attribute */ -static void br_set_port_flag(struct net_bridge_port *p, struct nlattr *tb[], - int attrtype, unsigned long mask) +static int br_set_port_flag(struct net_bridge_port *p, struct nlattr *tb[], + int attrtype, unsigned long mask) { - if (tb[attrtype]) { - u8 flag = nla_get_u8(tb[attrtype]); - if (flag) - p->flags |= mask; - else - p->flags &= ~mask; - } + unsigned long flags; + int err; + + if (!tb[attrtype]) + return 0; + + if (nla_get_u8(tb[attrtype])) + flags = p->flags | mask; + else + flags = p->flags & ~mask; + + err = br_switchdev_set_port_flag(p, flags, mask); + if (err) + return err; + + p->flags = flags; + return 0; } /* Process bridge protocol info on port */ @@ -681,20 +691,55 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) bool br_vlan_tunnel_old = false; int err; - br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE); - br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD); - br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE); - br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK); - br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING); - br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD); - br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_FLOOD, BR_MCAST_FLOOD); - br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_TO_UCAST, BR_MULTICAST_TO_UNICAST); - br_set_port_flag(p, tb, IFLA_BRPORT_BCAST_FLOOD, BR_BCAST_FLOOD); - br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP, BR_PROXYARP); - br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP_WIFI, BR_PROXYARP_WIFI); + err = br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE); + if (err) + return err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD); + if (err) + return err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE); + if (err) + return err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK); + if (err) + return err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING); + if (err) + return err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD); + if (err) + return err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_FLOOD, BR_MCAST_FLOOD); + if (err) + return err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_TO_UCAST, BR_MULTICAST_TO_UNICAST); + if (err) + return err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_BCAST_FLOOD, BR_BCAST_FLOOD); + if (err) + return err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP, BR_PROXYARP); + if (err) + return err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP_WIFI, BR_PROXYARP_WIFI); + if (err) + return err; br_vlan_tunnel_old = (p->flags & BR_VLAN_TUNNEL) ? true : false; - br_set_port_flag(p, tb, IFLA_BRPORT_VLAN_TUNNEL, BR_VLAN_TUNNEL); + err = br_set_port_flag(p, tb, IFLA_BRPORT_VLAN_TUNNEL, BR_VLAN_TUNNEL); + if (err) + return err; + if (br_vlan_tunnel_old && !(p->flags & BR_VLAN_TUNNEL)) nbp_vlan_tunnel_info_flush(p); @@ -813,7 +858,9 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) return err; } -static int br_validate(struct nlattr *tb[], struct nlattr *data[]) + +static int br_validate(struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) { if (tb[IFLA_ADDRESS]) { if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) @@ -850,7 +897,8 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[]) static int br_port_slave_changelink(struct net_device *brdev, struct net_device *dev, struct nlattr *tb[], - struct nlattr *data[]) + struct nlattr *data[], + struct netlink_ext_ack *extack) { struct net_bridge *br = netdev_priv(brdev); int ret; @@ -915,7 +963,8 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = { }; static int br_changelink(struct net_device *brdev, struct nlattr *tb[], - struct nlattr *data[]) + struct nlattr *data[], + struct netlink_ext_ack *extack) { struct net_bridge *br = netdev_priv(brdev); int err; @@ -1168,7 +1217,8 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], } static int br_dev_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) { struct net_bridge *br = netdev_priv(dev); int err; @@ -1183,7 +1233,7 @@ static int br_dev_newlink(struct net *src_net, struct net_device *dev, if (err) return err; - err = br_changelink(dev, tb, data); + err = br_changelink(dev, tb, data, extack); if (err) unregister_netdevice(dev); return err; @@ -1251,7 +1301,7 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) u32 ageing_time = jiffies_to_clock_t(br->ageing_time); u32 stp_enabled = br->stp_enabled; u16 priority = (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1]; - u8 vlan_enabled = br_vlan_enabled(br); + u8 vlan_enabled = br_vlan_enabled(br->dev); u64 clockval; clockval = br_timer_value(&br->hello_timer); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 0d177280aa84..fd9ee73e0a6d 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -21,6 +21,7 @@ #include <net/ip6_fib.h> #include <linux/if_vlan.h> #include <linux/rhashtable.h> +#include <linux/refcount.h> #define BR_HASH_BITS 8 #define BR_HASH_SIZE (1 << BR_HASH_BITS) @@ -127,7 +128,7 @@ struct net_bridge_vlan { struct net_bridge_port *port; }; union { - atomic_t refcnt; + refcount_t refcnt; struct net_bridge_vlan *brvlan; }; @@ -169,7 +170,8 @@ struct net_bridge_fdb_entry { unsigned char is_local:1, is_static:1, added_by_user:1, - added_by_external_learn:1; + added_by_external_learn:1, + offloaded:1; /* write-heavy members should not affect lookups */ unsigned long updated ____cacheline_aligned_in_smp; @@ -284,6 +286,12 @@ static inline struct net_bridge_port *br_port_get_rtnl(const struct net_device * rtnl_dereference(dev->rx_handler_data) : NULL; } +static inline struct net_bridge_port *br_port_get_rtnl_rcu(const struct net_device *dev) +{ + return br_port_exists(dev) ? + rcu_dereference_rtnl(dev->rx_handler_data) : NULL; +} + struct net_bridge { spinlock_t lock; spinlock_t hash_lock; @@ -530,6 +538,8 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid); int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid); +void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p, + const unsigned char *addr, u16 vid); /* br_forward.c */ enum br_pkt_type { @@ -854,10 +864,6 @@ static inline u16 br_get_pvid(const struct net_bridge_vlan_group *vg) return vg->pvid; } -static inline int br_vlan_enabled(struct net_bridge *br) -{ - return br->vlan_enabled; -} #else static inline bool br_allowed_ingress(const struct net_bridge *br, struct net_bridge_vlan_group *vg, @@ -945,11 +951,6 @@ static inline u16 br_get_pvid(const struct net_bridge_vlan_group *vg) return 0; } -static inline int br_vlan_enabled(struct net_bridge *br) -{ - return 0; -} - static inline int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val) { @@ -1085,6 +1086,11 @@ void nbp_switchdev_frame_mark(const struct net_bridge_port *p, struct sk_buff *skb); bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, const struct sk_buff *skb); +int br_switchdev_set_port_flag(struct net_bridge_port *p, + unsigned long flags, + unsigned long mask); +void br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, + int type); #else static inline int nbp_switchdev_mark_set(struct net_bridge_port *p) { @@ -1101,6 +1107,18 @@ static inline bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, { return true; } + +static inline int br_switchdev_set_port_flag(struct net_bridge_port *p, + unsigned long flags, + unsigned long mask) +{ + return 0; +} + +static inline void +br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type) +{ +} #endif /* CONFIG_NET_SWITCHDEV */ #endif diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index 5881fbc114a9..1b75d6bf12bd 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -50,7 +50,7 @@ static void br_send_bpdu(struct net_bridge_port *p, skb->priority = TC_PRIO_CONTROL; skb_reserve(skb, LLC_RESERVE); - memcpy(__skb_put(skb, length), data, length); + __skb_put_data(skb, data, length); llc_pdu_header_init(skb, LLC_PDU_TYPE_U, LLC_SAP_BSPAN, LLC_SAP_BSPAN, LLC_PDU_CMD); diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 6f12a5271219..89110319ef0f 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -150,7 +150,6 @@ static int br_stp_call_user(struct net_bridge *br, char *arg) static void br_stp_start(struct net_bridge *br) { - struct net_bridge_port *p; int err = -ENOENT; if (net_eq(dev_net(br->dev), &init_net)) @@ -169,11 +168,6 @@ static void br_stp_start(struct net_bridge *br) if (!err) { br->stp_enabled = BR_USER_STP; br_debug(br, "userspace STP started\n"); - - /* Stop hello and hold timers */ - del_timer(&br->hello_timer); - list_for_each_entry(p, &br->port_list, list) - del_timer(&p->hold_timer); } else { br->stp_enabled = BR_KERNEL_STP; br_debug(br, "using kernel STP\n"); @@ -189,7 +183,6 @@ static void br_stp_start(struct net_bridge *br) static void br_stp_stop(struct net_bridge *br) { - struct net_bridge_port *p; int err; if (br->stp_enabled == BR_USER_STP) { @@ -198,10 +191,6 @@ static void br_stp_stop(struct net_bridge *br) br_err(br, "failed to stop userspace STP (%d)\n", err); /* To start timers on any ports left in blocking */ - mod_timer(&br->hello_timer, jiffies + br->hello_time); - list_for_each_entry(p, &br->port_list, list) - mod_timer(&p->hold_timer, - round_jiffies(jiffies + BR_HOLD_TIME)); spin_lock_bh(&br->lock); br_port_state_selection(br); spin_unlock_bh(&br->lock); diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index f4097b900de1..181a44d0f1da 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -55,3 +55,79 @@ bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, return !skb->offload_fwd_mark || BR_INPUT_SKB_CB(skb)->offload_fwd_mark != p->offload_fwd_mark; } + +/* Flags that can be offloaded to hardware */ +#define BR_PORT_FLAGS_HW_OFFLOAD (BR_LEARNING | BR_FLOOD | \ + BR_MCAST_FLOOD | BR_BCAST_FLOOD) + +int br_switchdev_set_port_flag(struct net_bridge_port *p, + unsigned long flags, + unsigned long mask) +{ + struct switchdev_attr attr = { + .orig_dev = p->dev, + .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT, + }; + int err; + + if (mask & ~BR_PORT_FLAGS_HW_OFFLOAD) + return 0; + + err = switchdev_port_attr_get(p->dev, &attr); + if (err == -EOPNOTSUPP) + return 0; + if (err) + return err; + + /* Check if specific bridge flag attribute offload is supported */ + if (!(attr.u.brport_flags_support & mask)) { + br_warn(p->br, "bridge flag offload is not supported %u(%s)\n", + (unsigned int)p->port_no, p->dev->name); + return -EOPNOTSUPP; + } + + attr.id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS; + attr.flags = SWITCHDEV_F_DEFER; + attr.u.brport_flags = flags; + err = switchdev_port_attr_set(p->dev, &attr); + if (err) { + br_warn(p->br, "error setting offload flag on port %u(%s)\n", + (unsigned int)p->port_no, p->dev->name); + return err; + } + + return 0; +} + +static void +br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac, + u16 vid, struct net_device *dev) +{ + struct switchdev_notifier_fdb_info info; + unsigned long notifier_type; + + info.addr = mac; + info.vid = vid; + notifier_type = adding ? SWITCHDEV_FDB_ADD_TO_DEVICE : SWITCHDEV_FDB_DEL_TO_DEVICE; + call_switchdev_notifiers(notifier_type, dev, &info.info); +} + +void +br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type) +{ + if (!fdb->added_by_user) + return; + + switch (type) { + case RTM_DELNEIGH: + br_switchdev_fdb_call_notifiers(false, fdb->addr.addr, + fdb->vlan_id, + fdb->dst->dev); + break; + case RTM_NEWNEIGH: + br_switchdev_fdb_call_notifiers(true, fdb->addr.addr, + fdb->vlan_id, + fdb->dst->dev); + break; + } +} diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 0b5dd607444c..723f25eed8ea 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -865,7 +865,7 @@ static struct attribute *bridge_attrs[] = { NULL }; -static struct attribute_group bridge_group = { +static const struct attribute_group bridge_group = { .name = SYSFS_BRIDGE_ATTR, .attrs = bridge_attrs, }; diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index b838213c408e..233a30040c91 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -158,7 +158,7 @@ static struct net_bridge_vlan *br_vlan_get_master(struct net_bridge *br, u16 vid if (WARN_ON(!masterv)) return NULL; } - atomic_inc(&masterv->refcnt); + refcount_inc(&masterv->refcnt); return masterv; } @@ -182,7 +182,7 @@ static void br_vlan_put_master(struct net_bridge_vlan *masterv) return; vg = br_vlan_group(masterv->br); - if (atomic_dec_and_test(&masterv->refcnt)) { + if (refcount_dec_and_test(&masterv->refcnt)) { rhashtable_remove_fast(&vg->vlan_hash, &masterv->vnode, br_vlan_rht_params); __vlan_del_list(masterv); @@ -573,7 +573,7 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) br_err(br, "failed insert local address into bridge forwarding table\n"); return ret; } - atomic_inc(&vlan->refcnt); + refcount_inc(&vlan->refcnt); vlan->flags |= BRIDGE_VLAN_INFO_BRENTRY; vg->num_vlans++; } @@ -595,7 +595,7 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) vlan->flags &= ~BRIDGE_VLAN_INFO_PVID; vlan->br = br; if (flags & BRIDGE_VLAN_INFO_BRENTRY) - atomic_set(&vlan->refcnt, 1); + refcount_set(&vlan->refcnt, 1); ret = __vlan_add(vlan, flags); if (ret) { free_percpu(vlan->stats); @@ -706,6 +706,14 @@ int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val) return __br_vlan_filter_toggle(br, val); } +bool br_vlan_enabled(const struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + + return !!br->vlan_enabled; +} +EXPORT_SYMBOL_GPL(br_vlan_enabled); + int __br_vlan_set_proto(struct net_bridge *br, __be16 proto) { int err = 0; diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c index e0bb624c3845..dfc86a0199da 100644 --- a/net/bridge/netfilter/ebt_dnat.c +++ b/net/bridge/netfilter/ebt_dnat.c @@ -61,7 +61,7 @@ static int ebt_dnat_tg_check(const struct xt_tgchk_param *par) (strcmp(par->table, "broute") != 0 || hook_mask & ~(1 << NF_BR_BROUTING))) return -EINVAL; - if (INVALID_TARGET) + if (ebt_invalid_target(info->target)) return -EINVAL; return 0; } diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c index 66697cbd0a8b..19f0f9592d32 100644 --- a/net/bridge/netfilter/ebt_mark.c +++ b/net/bridge/netfilter/ebt_mark.c @@ -44,7 +44,7 @@ static int ebt_mark_tg_check(const struct xt_tgchk_param *par) tmp = info->target | ~EBT_VERDICT_BITS; if (BASE_CHAIN && tmp == EBT_RETURN) return -EINVAL; - if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0) + if (ebt_invalid_target(tmp)) return -EINVAL; tmp = info->target & ~EBT_VERDICT_BITS; if (tmp != MARK_SET_VALUE && tmp != MARK_OR_VALUE && diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c index 8d2a85e0594e..a7223eaf490b 100644 --- a/net/bridge/netfilter/ebt_redirect.c +++ b/net/bridge/netfilter/ebt_redirect.c @@ -47,7 +47,7 @@ static int ebt_redirect_tg_check(const struct xt_tgchk_param *par) (strcmp(par->table, "broute") != 0 || hook_mask & ~(1 << NF_BR_BROUTING))) return -EINVAL; - if (INVALID_TARGET) + if (ebt_invalid_target(info->target)) return -EINVAL; return 0; } diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c index e56ccd060d26..11cf9e9e9222 100644 --- a/net/bridge/netfilter/ebt_snat.c +++ b/net/bridge/netfilter/ebt_snat.c @@ -51,7 +51,7 @@ static int ebt_snat_tg_check(const struct xt_tgchk_param *par) if (BASE_CHAIN && tmp == EBT_RETURN) return -EINVAL; - if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0) + if (ebt_invalid_target(tmp)) return -EINVAL; tmp = info->target | EBT_VERDICT_BITS; if ((tmp & ~NAT_ARP_BIT) != ~NAT_ARP_BIT) diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index 346ef6b00b8f..eaf05de37f75 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -28,7 +28,7 @@ static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb, { struct ethhdr *eth; - eth = (struct ethhdr *)skb_push(nskb, ETH_HLEN); + eth = skb_push(nskb, ETH_HLEN); skb_reset_mac_header(nskb); ether_addr_copy(eth->h_source, eth_hdr(oldskb)->h_dest); ether_addr_copy(eth->h_dest, eth_hdr(oldskb)->h_source); @@ -107,11 +107,10 @@ static void nft_reject_br_send_v4_unreach(struct net *net, struct iphdr *niph; struct icmphdr *icmph; unsigned int len; - void *payload; __wsum csum; u8 proto; - if (oldskb->csum_bad || !nft_bridge_iphdr_validate(oldskb)) + if (!nft_bridge_iphdr_validate(oldskb)) return; /* IP header checks: fragment. */ @@ -147,13 +146,11 @@ static void nft_reject_br_send_v4_unreach(struct net *net, net->ipv4.sysctl_ip_default_ttl); skb_reset_transport_header(nskb); - icmph = (struct icmphdr *)skb_put(nskb, sizeof(struct icmphdr)); - memset(icmph, 0, sizeof(*icmph)); + icmph = skb_put_zero(nskb, sizeof(struct icmphdr)); icmph->type = ICMP_DEST_UNREACH; icmph->code = code; - payload = skb_put(nskb, len); - memcpy(payload, skb_network_header(oldskb), len); + skb_put_data(nskb, skb_network_header(oldskb), len); csum = csum_partial((void *)icmph, len + sizeof(struct icmphdr), 0); icmph->checksum = csum_fold(csum); @@ -226,9 +223,6 @@ static bool reject6_br_csum_ok(struct sk_buff *skb, int hook) __be16 fo; u8 proto = ip6h->nexthdr; - if (skb->csum_bad) - return false; - if (skb_csum_unnecessary(skb)) return true; @@ -252,7 +246,6 @@ static void nft_reject_br_send_v6_unreach(struct net *net, struct ipv6hdr *nip6h; struct icmp6hdr *icmp6h; unsigned int len; - void *payload; if (!nft_bridge_ip6hdr_validate(oldskb)) return; @@ -278,13 +271,11 @@ static void nft_reject_br_send_v6_unreach(struct net *net, net->ipv6.devconf_all->hop_limit); skb_reset_transport_header(nskb); - icmp6h = (struct icmp6hdr *)skb_put(nskb, sizeof(struct icmp6hdr)); - memset(icmp6h, 0, sizeof(*icmp6h)); + icmp6h = skb_put_zero(nskb, sizeof(struct icmp6hdr)); icmp6h->icmp6_type = ICMPV6_DEST_UNREACH; icmp6h->icmp6_code = code; - payload = skb_put(nskb, len); - memcpy(payload, skb_network_header(oldskb), len); + skb_put_data(nskb, skb_network_header(oldskb), len); nip6h->payload_len = htons(nskb->len - sizeof(struct ipv6hdr)); icmp6h->icmp6_cksum = diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 21f18ea2fce4..632d5a416d97 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -1013,7 +1013,7 @@ static const struct proto_ops caif_stream_ops = { static void caif_sock_destructor(struct sock *sk) { struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); - caif_assert(!atomic_read(&sk->sk_wmem_alloc)); + caif_assert(!refcount_read(&sk->sk_wmem_alloc)); caif_assert(sk_unhashed(sk)); caif_assert(!sk->sk_socket); if (!sock_flag(sk, SOCK_DEAD)) { @@ -1103,7 +1103,7 @@ static int caif_create(struct net *net, struct socket *sock, int protocol, } -static struct net_proto_family caif_family_ops = { +static const struct net_proto_family caif_family_ops = { .family = PF_CAIF, .create = caif_create, .owner = THIS_MODULE, diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index fe3c53efb949..922ac1d605b3 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -461,7 +461,8 @@ static void caif_netlink_parms(struct nlattr *data[], } static int ipcaif_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) { int ret; struct chnl_net *caifdev; @@ -484,7 +485,8 @@ static int ipcaif_newlink(struct net *src_net, struct net_device *dev, } static int ipcaif_changelink(struct net_device *dev, struct nlattr *tb[], - struct nlattr *data[]) + struct nlattr *data[], + struct netlink_ext_ack *extack) { struct chnl_net *caifdev; ASSERT_RTNL(); diff --git a/net/can/bcm.c b/net/can/bcm.c index 65432633a250..47a8748d953a 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -282,7 +282,7 @@ static void bcm_can_tx(struct bcm_op *op) can_skb_prv(skb)->ifindex = dev->ifindex; can_skb_prv(skb)->skbcnt = 0; - memcpy(skb_put(skb, op->cfsiz), cf, op->cfsiz); + skb_put_data(skb, cf, op->cfsiz); /* send with loopback */ skb->dev = dev; @@ -318,13 +318,13 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, if (!skb) return; - memcpy(skb_put(skb, sizeof(*head)), head, sizeof(*head)); + skb_put_data(skb, head, sizeof(*head)); if (head->nframes) { /* CAN frames starting here */ firstframe = (struct canfd_frame *)skb_tail_pointer(skb); - memcpy(skb_put(skb, datalen), frames, datalen); + skb_put_data(skb, frames, datalen); /* * the BCM uses the flags-element of the canfd_frame diff --git a/net/core/datagram.c b/net/core/datagram.c index f9653987c0f9..6877c43cc92d 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -161,6 +161,45 @@ done: return skb; } +struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, + struct sk_buff_head *queue, + unsigned int flags, + void (*destructor)(struct sock *sk, + struct sk_buff *skb), + int *peeked, int *off, int *err, + struct sk_buff **last) +{ + struct sk_buff *skb; + int _off = *off; + + *last = queue->prev; + skb_queue_walk(queue, skb) { + if (flags & MSG_PEEK) { + if (_off >= skb->len && (skb->len || _off || + skb->peeked)) { + _off -= skb->len; + continue; + } + if (!skb->len) { + skb = skb_set_peeked(skb); + if (unlikely(IS_ERR(skb))) { + *err = PTR_ERR(skb); + return NULL; + } + } + *peeked = 1; + refcount_inc(&skb->users); + } else { + __skb_unlink(skb, queue); + if (destructor) + destructor(sk, skb); + } + *off = _off; + return skb; + } + return NULL; +} + /** * __skb_try_recv_datagram - Receive a datagram skbuff * @sk: socket @@ -222,40 +261,14 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, * Look at current nfs client by the way... * However, this function was correct in any case. 8) */ - int _off = *off; - - *last = (struct sk_buff *)queue; spin_lock_irqsave(&queue->lock, cpu_flags); - skb_queue_walk(queue, skb) { - *last = skb; - if (flags & MSG_PEEK) { - if (_off >= skb->len && (skb->len || _off || - skb->peeked)) { - _off -= skb->len; - continue; - } - if (!skb->len) { - skb = skb_set_peeked(skb); - if (IS_ERR(skb)) { - error = PTR_ERR(skb); - spin_unlock_irqrestore(&queue->lock, - cpu_flags); - goto no_packet; - } - } - *peeked = 1; - atomic_inc(&skb->users); - } else { - __skb_unlink(skb, queue); - if (destructor) - destructor(sk, skb); - } - spin_unlock_irqrestore(&queue->lock, cpu_flags); - *off = _off; - return skb; - } - + skb = __skb_try_recv_from_queue(sk, queue, flags, destructor, + peeked, off, &error, last); spin_unlock_irqrestore(&queue->lock, cpu_flags); + if (error) + goto no_packet; + if (skb) + return skb; if (!sk_can_busy_loop(sk)) break; @@ -317,9 +330,7 @@ void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len) { bool slow; - if (likely(atomic_read(&skb->users) == 1)) - smp_rmb(); - else if (likely(!atomic_dec_and_test(&skb->users))) { + if (!skb_unref(skb)) { sk_peek_offset_bwd(sk, len); return; } @@ -335,8 +346,8 @@ void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len) } EXPORT_SYMBOL(__skb_free_datagram_locked); -int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb, - unsigned int flags, +int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue, + struct sk_buff *skb, unsigned int flags, void (*destructor)(struct sock *sk, struct sk_buff *skb)) { @@ -344,15 +355,15 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb, if (flags & MSG_PEEK) { err = -ENOENT; - spin_lock_bh(&sk->sk_receive_queue.lock); - if (skb == skb_peek(&sk->sk_receive_queue)) { - __skb_unlink(skb, &sk->sk_receive_queue); - atomic_dec(&skb->users); + spin_lock_bh(&sk_queue->lock); + if (skb == skb_peek(sk_queue)) { + __skb_unlink(skb, sk_queue); + refcount_dec(&skb->users); if (destructor) destructor(sk, skb); err = 0; } - spin_unlock_bh(&sk->sk_receive_queue.lock); + spin_unlock_bh(&sk_queue->lock); } atomic_inc(&sk->sk_drops); @@ -383,7 +394,8 @@ EXPORT_SYMBOL(__sk_queue_drop_skb); int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) { - int err = __sk_queue_drop_skb(sk, skb, flags, NULL); + int err = __sk_queue_drop_skb(sk, &sk->sk_receive_queue, skb, flags, + NULL); kfree_skb(skb); sk_mem_reclaim_partial(sk); @@ -602,7 +614,7 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from) skb->data_len += copied; skb->len += copied; skb->truesize += truesize; - atomic_add(truesize, &skb->sk->sk_wmem_alloc); + refcount_add(truesize, &skb->sk->sk_wmem_alloc); while (copied) { int size = min_t(int, copied, PAGE_SIZE - start); skb_fill_page_desc(skb, frag++, pages[n], start, size); diff --git a/net/core/dev.c b/net/core/dev.c index 416137c64bf8..7098fba52be1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -105,6 +105,7 @@ #include <net/dst.h> #include <net/dst_metadata.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> #include <net/checksum.h> #include <net/xfrm.h> #include <linux/highmem.h> @@ -142,6 +143,7 @@ #include <linux/hrtimer.h> #include <linux/netfilter_ingress.h> #include <linux/crash_dump.h> +#include <linux/sctp.h> #include "net-sysfs.h" @@ -161,6 +163,7 @@ static int netif_rx_internal(struct sk_buff *skb); static int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev, struct netdev_notifier_info *info); +static struct napi_struct *napi_by_id(unsigned int napi_id); /* * The @dev_base_head list is protected by @dev_base_lock and the rtnl @@ -865,6 +868,31 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex) EXPORT_SYMBOL(dev_get_by_index); /** + * dev_get_by_napi_id - find a device by napi_id + * @napi_id: ID of the NAPI struct + * + * Search for an interface by NAPI ID. Returns %NULL if the device + * is not found or a pointer to the device. The device has not had + * its reference counter increased so the caller must be careful + * about locking. The caller must hold RCU lock. + */ + +struct net_device *dev_get_by_napi_id(unsigned int napi_id) +{ + struct napi_struct *napi; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + if (napi_id < MIN_NAPI_ID) + return NULL; + + napi = napi_by_id(napi_id); + + return napi ? napi->dev : NULL; +} +EXPORT_SYMBOL(dev_get_by_napi_id); + +/** * netdev_get_name - get a netdevice name, knowing its ifindex. * @net: network namespace * @name: a pointer to the buffer where the name will be stored. @@ -1834,7 +1862,7 @@ static inline int deliver_skb(struct sk_buff *skb, { if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) return -ENOMEM; - atomic_inc(&skb->users); + refcount_inc(&skb->users); return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } @@ -2456,10 +2484,10 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason) if (unlikely(!skb)) return; - if (likely(atomic_read(&skb->users) == 1)) { + if (likely(refcount_read(&skb->users) == 1)) { smp_rmb(); - atomic_set(&skb->users, 0); - } else if (likely(!atomic_dec_and_test(&skb->users))) { + refcount_set(&skb->users, 0); + } else if (likely(!refcount_dec_and_test(&skb->users))) { return; } get_kfree_skb_cb(skb)->reason = reason; @@ -2612,6 +2640,47 @@ out: } EXPORT_SYMBOL(skb_checksum_help); +int skb_crc32c_csum_help(struct sk_buff *skb) +{ + __le32 crc32c_csum; + int ret = 0, offset, start; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + goto out; + + if (unlikely(skb_is_gso(skb))) + goto out; + + /* Before computing a checksum, we should make sure no frag could + * be modified by an external entity : checksum could be wrong. + */ + if (unlikely(skb_has_shared_frag(skb))) { + ret = __skb_linearize(skb); + if (ret) + goto out; + } + start = skb_checksum_start_offset(skb); + offset = start + offsetof(struct sctphdr, checksum); + if (WARN_ON_ONCE(offset >= skb_headlen(skb))) { + ret = -EINVAL; + goto out; + } + if (skb_cloned(skb) && + !skb_clone_writable(skb, offset + sizeof(__le32))) { + ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); + if (ret) + goto out; + } + crc32c_csum = cpu_to_le32(~__skb_checksum(skb, start, + skb->len - start, ~(__u32)0, + crc32c_csum_stub)); + *(__le32 *)(skb->data + offset) = crc32c_csum; + skb->ip_summed = CHECKSUM_NONE; + skb->csum_not_inet = 0; +out: + return ret; +} + __be16 skb_network_protocol(struct sk_buff *skb, int *depth) { __be16 type = skb->protocol; @@ -2954,6 +3023,17 @@ static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb, return skb; } +int skb_csum_hwoffload_help(struct sk_buff *skb, + const netdev_features_t features) +{ + if (unlikely(skb->csum_not_inet)) + return !!(features & NETIF_F_SCTP_CRC) ? 0 : + skb_crc32c_csum_help(skb); + + return !!(features & NETIF_F_CSUM_MASK) ? 0 : skb_checksum_help(skb); +} +EXPORT_SYMBOL(skb_csum_hwoffload_help); + static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev) { netdev_features_t features; @@ -2992,8 +3072,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device else skb_set_transport_header(skb, skb_checksum_start_offset(skb)); - if (!(features & NETIF_F_CSUM_MASK) && - skb_checksum_help(skb)) + if (skb_csum_hwoffload_help(skb, features)) goto out_kfree_skb; } } @@ -3179,7 +3258,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */ qdisc_bstats_cpu_update(cl->q, skb); - switch (tc_classify(skb, cl, &cl_res, false)) { + switch (tcf_classify(skb, cl, &cl_res, false)) { case TC_ACT_OK: case TC_ACT_RECLASSIFY: skb->tc_index = TC_H_MIN(cl_res.classid); @@ -3191,6 +3270,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) return NULL; case TC_ACT_STOLEN: case TC_ACT_QUEUED: + case TC_ACT_TRAP: *ret = NET_XMIT_SUCCESS; consume_skb(skb); return NULL; @@ -3875,7 +3955,7 @@ static __latent_entropy void net_tx_action(struct softirq_action *h) clist = clist->next; - WARN_ON(atomic_read(&skb->users)); + WARN_ON(refcount_read(&skb->users)); if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED)) trace_consume_skb(skb); else @@ -3949,7 +4029,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, skb->tc_at_ingress = 1; qdisc_bstats_cpu_update(cl->q, skb); - switch (tc_classify(skb, cl, &cl_res, false)) { + switch (tcf_classify(skb, cl, &cl_res, false)) { case TC_ACT_OK: case TC_ACT_RECLASSIFY: skb->tc_index = TC_H_MIN(cl_res.classid); @@ -3960,6 +4040,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, return NULL; case TC_ACT_STOLEN: case TC_ACT_QUEUED: + case TC_ACT_TRAP: consume_skb(skb); return NULL; case TC_ACT_REDIRECT: @@ -4261,13 +4342,12 @@ static struct static_key generic_xdp_needed __read_mostly; static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp) { + struct bpf_prog *old = rtnl_dereference(dev->xdp_prog); struct bpf_prog *new = xdp->prog; int ret = 0; switch (xdp->command) { - case XDP_SETUP_PROG: { - struct bpf_prog *old = rtnl_dereference(dev->xdp_prog); - + case XDP_SETUP_PROG: rcu_assign_pointer(dev->xdp_prog, new); if (old) bpf_prog_put(old); @@ -4279,10 +4359,10 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp) dev_disable_lro(dev); } break; - } case XDP_QUERY_PROG: - xdp->prog_attached = !!rcu_access_pointer(dev->xdp_prog); + xdp->prog_attached = !!old; + xdp->prog_id = old ? old->aux->id : 0; break; default: @@ -4637,9 +4717,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (netif_elide_gro(skb->dev)) goto normal; - if (skb->csum_bad) - goto normal; - gro_list_prepare(napi, skb); rcu_read_lock(); @@ -6867,7 +6944,7 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down) } EXPORT_SYMBOL(dev_change_proto_down); -bool __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op) +u8 __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op, u32 *prog_id) { struct netdev_xdp xdp; @@ -6876,18 +6953,25 @@ bool __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op) /* Query must always succeed. */ WARN_ON(xdp_op(dev, &xdp) < 0); + if (prog_id) + *prog_id = xdp.prog_id; + return xdp.prog_attached; } static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op, - struct netlink_ext_ack *extack, + struct netlink_ext_ack *extack, u32 flags, struct bpf_prog *prog) { struct netdev_xdp xdp; memset(&xdp, 0, sizeof(xdp)); - xdp.command = XDP_SETUP_PROG; + if (flags & XDP_FLAGS_HW_MODE) + xdp.command = XDP_SETUP_PROG_HW; + else + xdp.command = XDP_SETUP_PROG; xdp.extack = extack; + xdp.flags = flags; xdp.prog = prog; return xdp_op(dev, &xdp); @@ -6913,7 +6997,7 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, ASSERT_RTNL(); xdp_op = xdp_chk = ops->ndo_xdp; - if (!xdp_op && (flags & XDP_FLAGS_DRV_MODE)) + if (!xdp_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) return -EOPNOTSUPP; if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE)) xdp_op = generic_xdp_install; @@ -6921,10 +7005,10 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, xdp_chk = generic_xdp_install; if (fd >= 0) { - if (xdp_chk && __dev_xdp_attached(dev, xdp_chk)) + if (xdp_chk && __dev_xdp_attached(dev, xdp_chk, NULL)) return -EEXIST; if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && - __dev_xdp_attached(dev, xdp_op)) + __dev_xdp_attached(dev, xdp_op, NULL)) return -EBUSY; prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP); @@ -6932,7 +7016,7 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, return PTR_ERR(prog); } - err = dev_xdp_install(dev, xdp_op, extack, prog); + err = dev_xdp_install(dev, xdp_op, extack, flags, prog); if (err < 0 && prog) bpf_prog_put(prog); @@ -7023,7 +7107,7 @@ static void rollback_registered_many(struct list_head *head) if (!dev->rtnl_link_ops || dev->rtnl_link_state == RTNL_LINK_INITIALIZED) - skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, + skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0, GFP_KERNEL); /* @@ -7751,7 +7835,7 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, { #if BITS_PER_LONG == 64 BUILD_BUG_ON(sizeof(*stats64) < sizeof(*netdev_stats)); - memcpy(stats64, netdev_stats, sizeof(*stats64)); + memcpy(stats64, netdev_stats, sizeof(*netdev_stats)); /* zero out counters that only exist in rtnl_link_stats64 */ memset((char *)stats64 + sizeof(*netdev_stats), 0, sizeof(*stats64) - sizeof(*netdev_stats)); @@ -8608,7 +8692,6 @@ static int __init net_dev_init(void) rc = cpuhp_setup_state_nocalls(CPUHP_NET_DEV_DEAD, "net/dev:dead", NULL, dev_cpu_dead); WARN_ON(rc < 0); - dst_subsys_init(); rc = 0; out: return rc; diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 27fad31784a8..82fd4c9c4a1b 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -225,6 +225,7 @@ static int net_hwtstamp_validate(struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_EVENT: case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + case HWTSTAMP_FILTER_NTP_ALL: rx_filter_valid = 1; break; } diff --git a/net/core/dst.c b/net/core/dst.c index 13ba4a090c41..00aa972ad1a1 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -42,108 +42,6 @@ * to dirty as few cache lines as possible in __dst_free(). * As this is not a very strong hint, we dont force an alignment on SMP. */ -static struct { - spinlock_t lock; - struct dst_entry *list; - unsigned long timer_inc; - unsigned long timer_expires; -} dst_garbage = { - .lock = __SPIN_LOCK_UNLOCKED(dst_garbage.lock), - .timer_inc = DST_GC_MAX, -}; -static void dst_gc_task(struct work_struct *work); -static void ___dst_free(struct dst_entry *dst); - -static DECLARE_DELAYED_WORK(dst_gc_work, dst_gc_task); - -static DEFINE_MUTEX(dst_gc_mutex); -/* - * long lived entries are maintained in this list, guarded by dst_gc_mutex - */ -static struct dst_entry *dst_busy_list; - -static void dst_gc_task(struct work_struct *work) -{ - int delayed = 0; - int work_performed = 0; - unsigned long expires = ~0L; - struct dst_entry *dst, *next, head; - struct dst_entry *last = &head; - - mutex_lock(&dst_gc_mutex); - next = dst_busy_list; - -loop: - while ((dst = next) != NULL) { - next = dst->next; - prefetch(&next->next); - cond_resched(); - if (likely(atomic_read(&dst->__refcnt))) { - last->next = dst; - last = dst; - delayed++; - continue; - } - work_performed++; - - dst = dst_destroy(dst); - if (dst) { - /* NOHASH and still referenced. Unless it is already - * on gc list, invalidate it and add to gc list. - * - * Note: this is temporary. Actually, NOHASH dst's - * must be obsoleted when parent is obsoleted. - * But we do not have state "obsoleted, but - * referenced by parent", so it is right. - */ - if (dst->obsolete > 0) - continue; - - ___dst_free(dst); - dst->next = next; - next = dst; - } - } - - spin_lock_bh(&dst_garbage.lock); - next = dst_garbage.list; - if (next) { - dst_garbage.list = NULL; - spin_unlock_bh(&dst_garbage.lock); - goto loop; - } - last->next = NULL; - dst_busy_list = head.next; - if (!dst_busy_list) - dst_garbage.timer_inc = DST_GC_MAX; - else { - /* - * if we freed less than 1/10 of delayed entries, - * we can sleep longer. - */ - if (work_performed <= delayed/10) { - dst_garbage.timer_expires += dst_garbage.timer_inc; - if (dst_garbage.timer_expires > DST_GC_MAX) - dst_garbage.timer_expires = DST_GC_MAX; - dst_garbage.timer_inc += DST_GC_INC; - } else { - dst_garbage.timer_inc = DST_GC_INC; - dst_garbage.timer_expires = DST_GC_MIN; - } - expires = dst_garbage.timer_expires; - /* - * if the next desired timer is more than 4 seconds in the - * future then round the timer to whole seconds - */ - if (expires > 4*HZ) - expires = round_jiffies_relative(expires); - schedule_delayed_work(&dst_gc_work, expires); - } - - spin_unlock_bh(&dst_garbage.lock); - mutex_unlock(&dst_gc_mutex); -} - int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb) { kfree_skb(skb); @@ -216,41 +114,12 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, } EXPORT_SYMBOL(dst_alloc); -static void ___dst_free(struct dst_entry *dst) -{ - /* The first case (dev==NULL) is required, when - protocol module is unloaded. - */ - if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) { - dst->input = dst_discard; - dst->output = dst_discard_out; - } - dst->obsolete = DST_OBSOLETE_DEAD; -} - -void __dst_free(struct dst_entry *dst) -{ - spin_lock_bh(&dst_garbage.lock); - ___dst_free(dst); - dst->next = dst_garbage.list; - dst_garbage.list = dst; - if (dst_garbage.timer_inc > DST_GC_INC) { - dst_garbage.timer_inc = DST_GC_INC; - dst_garbage.timer_expires = DST_GC_MIN; - mod_delayed_work(system_wq, &dst_gc_work, - dst_garbage.timer_expires); - } - spin_unlock_bh(&dst_garbage.lock); -} -EXPORT_SYMBOL(__dst_free); - struct dst_entry *dst_destroy(struct dst_entry * dst) { struct dst_entry *child; smp_rmb(); -again: child = dst->child; if (!(dst->flags & DST_NOCOUNT)) @@ -269,20 +138,8 @@ again: kmem_cache_free(dst->ops->kmem_cachep, dst); dst = child; - if (dst) { - int nohash = dst->flags & DST_NOHASH; - - if (atomic_dec_and_test(&dst->__refcnt)) { - /* We were real parent of this dst, so kill child. */ - if (nohash) - goto again; - } else { - /* Child is still referenced, return it for freeing. */ - if (nohash) - return dst; - /* Child is still in his hash table */ - } - } + if (dst) + dst_release_immediate(dst); return NULL; } EXPORT_SYMBOL(dst_destroy); @@ -292,26 +149,62 @@ static void dst_destroy_rcu(struct rcu_head *head) struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); dst = dst_destroy(dst); - if (dst) - __dst_free(dst); } +/* Operations to mark dst as DEAD and clean up the net device referenced + * by dst: + * 1. put the dst under loopback interface and discard all tx/rx packets + * on this route. + * 2. release the net_device + * This function should be called when removing routes from the fib tree + * in preparation for a NETDEV_DOWN/NETDEV_UNREGISTER event and also to + * make the next dst_ops->check() fail. + */ +void dst_dev_put(struct dst_entry *dst) +{ + struct net_device *dev = dst->dev; + + dst->obsolete = DST_OBSOLETE_DEAD; + if (dst->ops->ifdown) + dst->ops->ifdown(dst, dev, true); + dst->input = dst_discard; + dst->output = dst_discard_out; + dst->dev = dev_net(dst->dev)->loopback_dev; + dev_hold(dst->dev); + dev_put(dev); +} +EXPORT_SYMBOL(dst_dev_put); + void dst_release(struct dst_entry *dst) { if (dst) { int newrefcnt; - unsigned short nocache = dst->flags & DST_NOCACHE; newrefcnt = atomic_dec_return(&dst->__refcnt); if (unlikely(newrefcnt < 0)) net_warn_ratelimited("%s: dst:%p refcnt:%d\n", __func__, dst, newrefcnt); - if (!newrefcnt && unlikely(nocache)) + if (!newrefcnt) call_rcu(&dst->rcu_head, dst_destroy_rcu); } } EXPORT_SYMBOL(dst_release); +void dst_release_immediate(struct dst_entry *dst) +{ + if (dst) { + int newrefcnt; + + newrefcnt = atomic_dec_return(&dst->__refcnt); + if (unlikely(newrefcnt < 0)) + net_warn_ratelimited("%s: dst:%p refcnt:%d\n", + __func__, dst, newrefcnt); + if (!newrefcnt) + dst_destroy(dst); + } +} +EXPORT_SYMBOL(dst_release_immediate); + u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old) { struct dst_metrics *p = kmalloc(sizeof(*p), GFP_ATOMIC); @@ -371,21 +264,25 @@ static int dst_md_discard(struct sk_buff *skb) return 0; } -static void __metadata_dst_init(struct metadata_dst *md_dst, u8 optslen) +static void __metadata_dst_init(struct metadata_dst *md_dst, + enum metadata_type type, u8 optslen) + { struct dst_entry *dst; dst = &md_dst->dst; dst_init(dst, &md_dst_ops, NULL, 1, DST_OBSOLETE_NONE, - DST_METADATA | DST_NOCACHE | DST_NOCOUNT); + DST_METADATA | DST_NOCOUNT); dst->input = dst_md_discard; dst->output = dst_md_discard_out; memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst)); + md_dst->type = type; } -struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags) +struct metadata_dst *metadata_dst_alloc(u8 optslen, enum metadata_type type, + gfp_t flags) { struct metadata_dst *md_dst; @@ -393,7 +290,7 @@ struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags) if (!md_dst) return NULL; - __metadata_dst_init(md_dst, optslen); + __metadata_dst_init(md_dst, type, optslen); return md_dst; } @@ -407,7 +304,8 @@ void metadata_dst_free(struct metadata_dst *md_dst) kfree(md_dst); } -struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags) +struct metadata_dst __percpu * +metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags) { int cpu; struct metadata_dst __percpu *md_dst; @@ -418,91 +316,8 @@ struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags) return NULL; for_each_possible_cpu(cpu) - __metadata_dst_init(per_cpu_ptr(md_dst, cpu), optslen); + __metadata_dst_init(per_cpu_ptr(md_dst, cpu), type, optslen); return md_dst; } EXPORT_SYMBOL_GPL(metadata_dst_alloc_percpu); - -/* Dirty hack. We did it in 2.2 (in __dst_free), - * we have _very_ good reasons not to repeat - * this mistake in 2.3, but we have no choice - * now. _It_ _is_ _explicit_ _deliberate_ - * _race_ _condition_. - * - * Commented and originally written by Alexey. - */ -static void dst_ifdown(struct dst_entry *dst, struct net_device *dev, - int unregister) -{ - if (dst->ops->ifdown) - dst->ops->ifdown(dst, dev, unregister); - - if (dev != dst->dev) - return; - - if (!unregister) { - dst->input = dst_discard; - dst->output = dst_discard_out; - } else { - dst->dev = dev_net(dst->dev)->loopback_dev; - dev_hold(dst->dev); - dev_put(dev); - } -} - -static int dst_dev_event(struct notifier_block *this, unsigned long event, - void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct dst_entry *dst, *last = NULL; - - switch (event) { - case NETDEV_UNREGISTER_FINAL: - case NETDEV_DOWN: - mutex_lock(&dst_gc_mutex); - for (dst = dst_busy_list; dst; dst = dst->next) { - last = dst; - dst_ifdown(dst, dev, event != NETDEV_DOWN); - } - - spin_lock_bh(&dst_garbage.lock); - dst = dst_garbage.list; - dst_garbage.list = NULL; - /* The code in dst_ifdown places a hold on the loopback device. - * If the gc entry processing is set to expire after a lengthy - * interval, this hold can cause netdev_wait_allrefs() to hang - * out and wait for a long time -- until the the loopback - * interface is released. If we're really unlucky, it'll emit - * pr_emerg messages to console too. Reset the interval here, - * so dst cleanups occur in a more timely fashion. - */ - if (dst_garbage.timer_inc > DST_GC_INC) { - dst_garbage.timer_inc = DST_GC_INC; - dst_garbage.timer_expires = DST_GC_MIN; - mod_delayed_work(system_wq, &dst_gc_work, - dst_garbage.timer_expires); - } - spin_unlock_bh(&dst_garbage.lock); - - if (last) - last->next = dst; - else - dst_busy_list = dst; - for (; dst; dst = dst->next) - dst_ifdown(dst, dev, event != NETDEV_DOWN); - mutex_unlock(&dst_gc_mutex); - break; - } - return NOTIFY_DONE; -} - -static struct notifier_block dst_dev_notifier = { - .notifier_call = dst_dev_event, - .priority = -10, /* must be called after other network notifiers */ -}; - -void __init dst_subsys_init(void) -{ - register_netdevice_notifier(&dst_dev_notifier); -} diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 3bba291c6c32..a0093e1b0235 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -46,7 +46,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops, if (r == NULL) return -ENOMEM; - atomic_set(&r->refcnt, 1); + refcount_set(&r->refcnt, 1); r->action = FR_ACT_TO_TBL; r->pref = pref; r->table = table; @@ -283,7 +283,7 @@ jumped: if (err != -EAGAIN) { if ((arg->flags & FIB_LOOKUP_NOREF) || - likely(atomic_inc_not_zero(&rule->refcnt))) { + likely(refcount_inc_not_zero(&rule->refcnt))) { arg->rule = rule; goto out; } @@ -517,7 +517,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, last = r; } - fib_rule_get(rule); + refcount_set(&rule->refcnt, 1); if (last) list_add_rcu(&rule->list, &last->list); diff --git a/net/core/filter.c b/net/core/filter.c index a6bb95fa87b2..c7f737058d89 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -54,6 +54,7 @@ #include <net/dst.h> #include <net/sock_reuseport.h> #include <net/busy_poll.h> +#include <net/tcp.h> /** * sk_filter_trim_cap - run a packet through a socket filter @@ -352,7 +353,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp, * bpf_convert_filter - convert filter program * @prog: the user passed filter program * @len: the length of the user passed filter program - * @new_prog: buffer where converted program will be stored + * @new_prog: allocated 'struct bpf_prog' or NULL * @new_len: pointer to store length of converted program * * Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn' @@ -364,14 +365,13 @@ static bool convert_bpf_extensions(struct sock_filter *fp, * * 2) 2nd pass to remap in two passes: 1st pass finds new * jump offsets, 2nd pass remapping: - * new_prog = kmalloc(sizeof(struct bpf_insn) * new_len); * bpf_convert_filter(old_prog, old_len, new_prog, &new_len); */ static int bpf_convert_filter(struct sock_filter *prog, int len, - struct bpf_insn *new_prog, int *new_len) + struct bpf_prog *new_prog, int *new_len) { - int new_flen = 0, pass = 0, target, i; - struct bpf_insn *new_insn; + int new_flen = 0, pass = 0, target, i, stack_off; + struct bpf_insn *new_insn, *first_insn = NULL; struct sock_filter *fp; int *addrs = NULL; u8 bpf_src; @@ -383,6 +383,7 @@ static int bpf_convert_filter(struct sock_filter *prog, int len, return -EINVAL; if (new_prog) { + first_insn = new_prog->insnsi; addrs = kcalloc(len, sizeof(*addrs), GFP_KERNEL | __GFP_NOWARN); if (!addrs) @@ -390,11 +391,11 @@ static int bpf_convert_filter(struct sock_filter *prog, int len, } do_pass: - new_insn = new_prog; + new_insn = first_insn; fp = prog; /* Classic BPF related prologue emission. */ - if (new_insn) { + if (new_prog) { /* Classic BPF expects A and X to be reset first. These need * to be guaranteed to be the first two instructions. */ @@ -415,7 +416,7 @@ do_pass: struct bpf_insn *insn = tmp_insns; if (addrs) - addrs[i] = new_insn - new_prog; + addrs[i] = new_insn - first_insn; switch (fp->code) { /* All arithmetic insns and skb loads map as-is. */ @@ -561,17 +562,25 @@ do_pass: /* Store to stack. */ case BPF_ST: case BPF_STX: + stack_off = fp->k * 4 + 4; *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) == BPF_ST ? BPF_REG_A : BPF_REG_X, - -(BPF_MEMWORDS - fp->k) * 4); + -stack_off); + /* check_load_and_stores() verifies that classic BPF can + * load from stack only after write, so tracking + * stack_depth for ST|STX insns is enough + */ + if (new_prog && new_prog->aux->stack_depth < stack_off) + new_prog->aux->stack_depth = stack_off; break; /* Load from stack. */ case BPF_LD | BPF_MEM: case BPF_LDX | BPF_MEM: + stack_off = fp->k * 4 + 4; *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ? BPF_REG_A : BPF_REG_X, BPF_REG_FP, - -(BPF_MEMWORDS - fp->k) * 4); + -stack_off); break; /* A = K or X = K */ @@ -619,13 +628,13 @@ do_pass: if (!new_prog) { /* Only calculating new length. */ - *new_len = new_insn - new_prog; + *new_len = new_insn - first_insn; return 0; } pass++; - if (new_flen != new_insn - new_prog) { - new_flen = new_insn - new_prog; + if (new_flen != new_insn - first_insn) { + new_flen = new_insn - first_insn; if (pass > 2) goto err; goto do_pass; @@ -1017,7 +1026,7 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) fp->len = new_len; /* 2nd pass: remap sock_filter insns into bpf_insn insns. */ - err = bpf_convert_filter(old_prog, old_len, fp->insnsi, &new_len); + err = bpf_convert_filter(old_prog, old_len, fp, &new_len); if (err) /* 2nd bpf_convert_filter() can fail only if it fails * to allocate memory, remapping must succeed. Note, @@ -1866,6 +1875,24 @@ static const struct bpf_func_proto bpf_set_hash_invalid_proto = { .arg1_type = ARG_PTR_TO_CTX, }; +BPF_CALL_2(bpf_set_hash, struct sk_buff *, skb, u32, hash) +{ + /* Set user specified hash as L4(+), so that it gets returned + * on skb_get_hash() call unless BPF prog later on triggers a + * skb_clear_hash(). + */ + __skb_set_sw_hash(skb, hash, true); + return 0; +} + +static const struct bpf_func_proto bpf_set_hash_proto = { + .func = bpf_set_hash, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto, u16, vlan_tci) { @@ -1985,7 +2012,7 @@ static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len) static int bpf_skb_proto_4_to_6(struct sk_buff *skb) { const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr); - u32 off = skb->network_header - skb->mac_header; + u32 off = skb_mac_header_len(skb); int ret; ret = skb_cow(skb, len_diff); @@ -2021,7 +2048,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb) static int bpf_skb_proto_6_to_4(struct sk_buff *skb) { const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr); - u32 off = skb->network_header - skb->mac_header; + u32 off = skb_mac_header_len(skb); int ret; ret = skb_unclone(skb, GFP_ATOMIC); @@ -2127,6 +2154,124 @@ static const struct bpf_func_proto bpf_skb_change_type_proto = { .arg2_type = ARG_ANYTHING, }; +static u32 bpf_skb_net_base_len(const struct sk_buff *skb) +{ + switch (skb->protocol) { + case htons(ETH_P_IP): + return sizeof(struct iphdr); + case htons(ETH_P_IPV6): + return sizeof(struct ipv6hdr); + default: + return ~0U; + } +} + +static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff) +{ + u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); + int ret; + + ret = skb_cow(skb, len_diff); + if (unlikely(ret < 0)) + return ret; + + ret = bpf_skb_net_hdr_push(skb, off, len_diff); + if (unlikely(ret < 0)) + return ret; + + if (skb_is_gso(skb)) { + /* Due to header grow, MSS needs to be downgraded. */ + skb_shinfo(skb)->gso_size -= len_diff; + /* Header must be checked, and gso_segs recomputed. */ + skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; + skb_shinfo(skb)->gso_segs = 0; + } + + return 0; +} + +static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff) +{ + u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); + int ret; + + ret = skb_unclone(skb, GFP_ATOMIC); + if (unlikely(ret < 0)) + return ret; + + ret = bpf_skb_net_hdr_pop(skb, off, len_diff); + if (unlikely(ret < 0)) + return ret; + + if (skb_is_gso(skb)) { + /* Due to header shrink, MSS can be upgraded. */ + skb_shinfo(skb)->gso_size += len_diff; + /* Header must be checked, and gso_segs recomputed. */ + skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; + skb_shinfo(skb)->gso_segs = 0; + } + + return 0; +} + +static u32 __bpf_skb_max_len(const struct sk_buff *skb) +{ + return skb->dev->mtu + skb->dev->hard_header_len; +} + +static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff) +{ + bool trans_same = skb->transport_header == skb->network_header; + u32 len_cur, len_diff_abs = abs(len_diff); + u32 len_min = bpf_skb_net_base_len(skb); + u32 len_max = __bpf_skb_max_len(skb); + __be16 proto = skb->protocol; + bool shrink = len_diff < 0; + int ret; + + if (unlikely(len_diff_abs > 0xfffU)) + return -EFAULT; + if (unlikely(proto != htons(ETH_P_IP) && + proto != htons(ETH_P_IPV6))) + return -ENOTSUPP; + + len_cur = skb->len - skb_network_offset(skb); + if (skb_transport_header_was_set(skb) && !trans_same) + len_cur = skb_network_header_len(skb); + if ((shrink && (len_diff_abs >= len_cur || + len_cur - len_diff_abs < len_min)) || + (!shrink && (skb->len + len_diff_abs > len_max && + !skb_is_gso(skb)))) + return -ENOTSUPP; + + ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) : + bpf_skb_net_grow(skb, len_diff_abs); + + bpf_compute_data_end(skb); + return 0; +} + +BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff, + u32, mode, u64, flags) +{ + if (unlikely(flags)) + return -EINVAL; + if (likely(mode == BPF_ADJ_ROOM_NET)) + return bpf_skb_adjust_net(skb, len_diff); + + return -ENOTSUPP; +} + +static const struct bpf_func_proto bpf_skb_adjust_room_proto = { + .func = bpf_skb_adjust_room, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_ANYTHING, +}; + static u32 __bpf_skb_min_len(const struct sk_buff *skb) { u32 min_len = skb_network_offset(skb); @@ -2139,11 +2284,6 @@ static u32 __bpf_skb_min_len(const struct sk_buff *skb) return min_len; } -static u32 __bpf_skb_max_len(const struct sk_buff *skb) -{ - return skb->dev->mtu + skb->dev->hard_header_len; -} - static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len) { unsigned int old_len = skb->len; @@ -2280,6 +2420,7 @@ bool bpf_helper_changes_pkt_data(void *func) func == bpf_skb_change_proto || func == bpf_skb_change_head || func == bpf_skb_change_tail || + func == bpf_skb_adjust_room || func == bpf_skb_pull_data || func == bpf_clone_redirect || func == bpf_l3_csum_replace || @@ -2539,6 +2680,7 @@ bpf_get_skb_set_tunnel_proto(enum bpf_func_id which) * that is holding verifier mutex. */ md_dst = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX, + METADATA_IP_TUNNEL, GFP_KERNEL); if (!md_dst) return NULL; @@ -2645,6 +2787,110 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = { .arg1_type = ARG_PTR_TO_CTX, }; +BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, + int, level, int, optname, char *, optval, int, optlen) +{ + struct sock *sk = bpf_sock->sk; + int ret = 0; + int val; + + if (!sk_fullsock(sk)) + return -EINVAL; + + if (level == SOL_SOCKET) { + if (optlen != sizeof(int)) + return -EINVAL; + val = *((int *)optval); + + /* Only some socketops are supported */ + switch (optname) { + case SO_RCVBUF: + sk->sk_userlocks |= SOCK_RCVBUF_LOCK; + sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); + break; + case SO_SNDBUF: + sk->sk_userlocks |= SOCK_SNDBUF_LOCK; + sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); + break; + case SO_MAX_PACING_RATE: + sk->sk_max_pacing_rate = val; + sk->sk_pacing_rate = min(sk->sk_pacing_rate, + sk->sk_max_pacing_rate); + break; + case SO_PRIORITY: + sk->sk_priority = val; + break; + case SO_RCVLOWAT: + if (val < 0) + val = INT_MAX; + sk->sk_rcvlowat = val ? : 1; + break; + case SO_MARK: + sk->sk_mark = val; + break; + default: + ret = -EINVAL; + } +#ifdef CONFIG_INET + } else if (level == SOL_TCP && + sk->sk_prot->setsockopt == tcp_setsockopt) { + if (optname == TCP_CONGESTION) { + char name[TCP_CA_NAME_MAX]; + + strncpy(name, optval, min_t(long, optlen, + TCP_CA_NAME_MAX-1)); + name[TCP_CA_NAME_MAX-1] = 0; + ret = tcp_set_congestion_control(sk, name, false); + if (!ret && bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN) + /* replacing an existing ca */ + tcp_reinit_congestion_control(sk, + inet_csk(sk)->icsk_ca_ops); + } else { + struct tcp_sock *tp = tcp_sk(sk); + + if (optlen != sizeof(int)) + return -EINVAL; + + val = *((int *)optval); + /* Only some options are supported */ + switch (optname) { + case TCP_BPF_IW: + if (val <= 0 || tp->data_segs_out > 0) + ret = -EINVAL; + else + tp->snd_cwnd = val; + break; + case TCP_BPF_SNDCWND_CLAMP: + if (val <= 0) { + ret = -EINVAL; + } else { + tp->snd_cwnd_clamp = val; + tp->snd_ssthresh = val; + } + break; + default: + ret = -EINVAL; + } + } + ret = -EINVAL; +#endif + } else { + ret = -EINVAL; + } + return ret; +} + +static const struct bpf_func_proto bpf_setsockopt_proto = { + .func = bpf_setsockopt, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_MEM, + .arg5_type = ARG_CONST_SIZE, +}; + static const struct bpf_func_proto * bpf_base_func_proto(enum bpf_func_id func_id) { @@ -2718,6 +2964,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_skb_change_proto_proto; case BPF_FUNC_skb_change_type: return &bpf_skb_change_type_proto; + case BPF_FUNC_skb_adjust_room: + return &bpf_skb_adjust_room_proto; case BPF_FUNC_skb_change_tail: return &bpf_skb_change_tail_proto; case BPF_FUNC_skb_get_tunnel_key: @@ -2736,6 +2984,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_get_hash_recalc_proto; case BPF_FUNC_set_hash_invalid: return &bpf_set_hash_invalid_proto; + case BPF_FUNC_set_hash: + return &bpf_set_hash_proto; case BPF_FUNC_perf_event_output: return &bpf_skb_event_output_proto; case BPF_FUNC_get_smp_processor_id: @@ -2767,12 +3017,6 @@ xdp_func_proto(enum bpf_func_id func_id) } static const struct bpf_func_proto * -cg_skb_func_proto(enum bpf_func_id func_id) -{ - return sk_filter_func_proto(func_id); -} - -static const struct bpf_func_proto * lwt_inout_func_proto(enum bpf_func_id func_id) { switch (func_id) { @@ -2800,6 +3044,17 @@ lwt_inout_func_proto(enum bpf_func_id func_id) } static const struct bpf_func_proto * + sock_ops_func_proto(enum bpf_func_id func_id) +{ + switch (func_id) { + case BPF_FUNC_setsockopt: + return &bpf_setsockopt_proto; + default: + return bpf_base_func_proto(func_id); + } +} + +static const struct bpf_func_proto * lwt_xmit_func_proto(enum bpf_func_id func_id) { switch (func_id) { @@ -2834,8 +3089,11 @@ lwt_xmit_func_proto(enum bpf_func_id func_id) } } -static bool __is_valid_access(int off, int size) +static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type, + struct bpf_insn_access_aux *info) { + const int size_default = sizeof(__u32); + if (off < 0 || off >= sizeof(struct __sk_buff)) return false; @@ -2844,15 +3102,25 @@ static bool __is_valid_access(int off, int size) return false; switch (off) { - case offsetof(struct __sk_buff, cb[0]) ... - offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1: - if (off + size > - offsetof(struct __sk_buff, cb[4]) + sizeof(__u32)) + case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): + if (off + size > offsetofend(struct __sk_buff, cb[4])) return false; break; - default: - if (size != sizeof(__u32)) + case bpf_ctx_range(struct __sk_buff, data): + case bpf_ctx_range(struct __sk_buff, data_end): + if (size != size_default) return false; + break; + default: + /* Only narrow read access allowed for now. */ + if (type == BPF_WRITE) { + if (size != size_default) + return false; + } else { + bpf_ctx_record_field_size(info, size_default); + if (!bpf_ctx_narrow_access_ok(off, size, size_default)) + return false; + } } return true; @@ -2860,43 +3128,41 @@ static bool __is_valid_access(int off, int size) static bool sk_filter_is_valid_access(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type) + struct bpf_insn_access_aux *info) { switch (off) { - case offsetof(struct __sk_buff, tc_classid): - case offsetof(struct __sk_buff, data): - case offsetof(struct __sk_buff, data_end): + case bpf_ctx_range(struct __sk_buff, tc_classid): + case bpf_ctx_range(struct __sk_buff, data): + case bpf_ctx_range(struct __sk_buff, data_end): return false; } if (type == BPF_WRITE) { switch (off) { - case offsetof(struct __sk_buff, cb[0]) ... - offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1: + case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): break; default: return false; } } - return __is_valid_access(off, size); + return bpf_skb_is_valid_access(off, size, type, info); } static bool lwt_is_valid_access(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type) + struct bpf_insn_access_aux *info) { switch (off) { - case offsetof(struct __sk_buff, tc_classid): + case bpf_ctx_range(struct __sk_buff, tc_classid): return false; } if (type == BPF_WRITE) { switch (off) { - case offsetof(struct __sk_buff, mark): - case offsetof(struct __sk_buff, priority): - case offsetof(struct __sk_buff, cb[0]) ... - offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1: + case bpf_ctx_range(struct __sk_buff, mark): + case bpf_ctx_range(struct __sk_buff, priority): + case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): break; default: return false; @@ -2904,20 +3170,20 @@ static bool lwt_is_valid_access(int off, int size, } switch (off) { - case offsetof(struct __sk_buff, data): - *reg_type = PTR_TO_PACKET; + case bpf_ctx_range(struct __sk_buff, data): + info->reg_type = PTR_TO_PACKET; break; - case offsetof(struct __sk_buff, data_end): - *reg_type = PTR_TO_PACKET_END; + case bpf_ctx_range(struct __sk_buff, data_end): + info->reg_type = PTR_TO_PACKET_END; break; } - return __is_valid_access(off, size); + return bpf_skb_is_valid_access(off, size, type, info); } static bool sock_filter_is_valid_access(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type) + struct bpf_insn_access_aux *info) { if (type == BPF_WRITE) { switch (off) { @@ -2980,16 +3246,15 @@ static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write, static bool tc_cls_act_is_valid_access(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type) + struct bpf_insn_access_aux *info) { if (type == BPF_WRITE) { switch (off) { - case offsetof(struct __sk_buff, mark): - case offsetof(struct __sk_buff, tc_index): - case offsetof(struct __sk_buff, priority): - case offsetof(struct __sk_buff, cb[0]) ... - offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1: - case offsetof(struct __sk_buff, tc_classid): + case bpf_ctx_range(struct __sk_buff, mark): + case bpf_ctx_range(struct __sk_buff, tc_index): + case bpf_ctx_range(struct __sk_buff, priority): + case bpf_ctx_range(struct __sk_buff, tc_classid): + case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): break; default: return false; @@ -2997,15 +3262,15 @@ static bool tc_cls_act_is_valid_access(int off, int size, } switch (off) { - case offsetof(struct __sk_buff, data): - *reg_type = PTR_TO_PACKET; + case bpf_ctx_range(struct __sk_buff, data): + info->reg_type = PTR_TO_PACKET; break; - case offsetof(struct __sk_buff, data_end): - *reg_type = PTR_TO_PACKET_END; + case bpf_ctx_range(struct __sk_buff, data_end): + info->reg_type = PTR_TO_PACKET_END; break; } - return __is_valid_access(off, size); + return bpf_skb_is_valid_access(off, size, type, info); } static bool __is_valid_xdp_access(int off, int size) @@ -3022,17 +3287,17 @@ static bool __is_valid_xdp_access(int off, int size) static bool xdp_is_valid_access(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type) + struct bpf_insn_access_aux *info) { if (type == BPF_WRITE) return false; switch (off) { case offsetof(struct xdp_md, data): - *reg_type = PTR_TO_PACKET; + info->reg_type = PTR_TO_PACKET; break; case offsetof(struct xdp_md, data_end): - *reg_type = PTR_TO_PACKET_END; + info->reg_type = PTR_TO_PACKET_END; break; } @@ -3045,101 +3310,141 @@ void bpf_warn_invalid_xdp_action(u32 act) } EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); +static bool __is_valid_sock_ops_access(int off, int size) +{ + if (off < 0 || off >= sizeof(struct bpf_sock_ops)) + return false; + /* The verifier guarantees that size > 0. */ + if (off % size != 0) + return false; + if (size != sizeof(__u32)) + return false; + + return true; +} + +static bool sock_ops_is_valid_access(int off, int size, + enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + if (type == BPF_WRITE) { + switch (off) { + case offsetof(struct bpf_sock_ops, op) ... + offsetof(struct bpf_sock_ops, replylong[3]): + break; + default: + return false; + } + } + + return __is_valid_sock_ops_access(off, size); +} + static u32 bpf_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, - struct bpf_prog *prog) + struct bpf_prog *prog, u32 *target_size) { struct bpf_insn *insn = insn_buf; int off; switch (si->off) { case offsetof(struct __sk_buff, len): - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); - *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, len)); + bpf_target_off(struct sk_buff, len, 4, + target_size)); break; case offsetof(struct __sk_buff, protocol): - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); - *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, protocol)); + bpf_target_off(struct sk_buff, protocol, 2, + target_size)); break; case offsetof(struct __sk_buff, vlan_proto): - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2); - *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, vlan_proto)); + bpf_target_off(struct sk_buff, vlan_proto, 2, + target_size)); break; case offsetof(struct __sk_buff, priority): - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, priority) != 4); - if (type == BPF_WRITE) *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, priority)); + bpf_target_off(struct sk_buff, priority, 4, + target_size)); else *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, priority)); + bpf_target_off(struct sk_buff, priority, 4, + target_size)); break; case offsetof(struct __sk_buff, ingress_ifindex): - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, skb_iif) != 4); - *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, skb_iif)); + bpf_target_off(struct sk_buff, skb_iif, 4, + target_size)); break; case offsetof(struct __sk_buff, ifindex): - BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev), si->dst_reg, si->src_reg, offsetof(struct sk_buff, dev)); *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1); *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, - offsetof(struct net_device, ifindex)); + bpf_target_off(struct net_device, ifindex, 4, + target_size)); break; case offsetof(struct __sk_buff, hash): - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); - *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, hash)); + bpf_target_off(struct sk_buff, hash, 4, + target_size)); break; case offsetof(struct __sk_buff, mark): - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); - if (type == BPF_WRITE) *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, mark)); + bpf_target_off(struct sk_buff, mark, 4, + target_size)); else *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, mark)); + bpf_target_off(struct sk_buff, mark, 4, + target_size)); break; case offsetof(struct __sk_buff, pkt_type): - return convert_skb_access(SKF_AD_PKTTYPE, si->dst_reg, - si->src_reg, insn); + *target_size = 1; + *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg, + PKT_TYPE_OFFSET()); + *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, PKT_TYPE_MAX); +#ifdef __BIG_ENDIAN_BITFIELD + *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 5); +#endif + break; case offsetof(struct __sk_buff, queue_mapping): - return convert_skb_access(SKF_AD_QUEUE, si->dst_reg, - si->src_reg, insn); + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, + bpf_target_off(struct sk_buff, queue_mapping, 2, + target_size)); + break; case offsetof(struct __sk_buff, vlan_present): - return convert_skb_access(SKF_AD_VLAN_TAG_PRESENT, - si->dst_reg, si->src_reg, insn); - case offsetof(struct __sk_buff, vlan_tci): - return convert_skb_access(SKF_AD_VLAN_TAG, - si->dst_reg, si->src_reg, insn); + BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); + + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, + bpf_target_off(struct sk_buff, vlan_tci, 2, + target_size)); + if (si->off == offsetof(struct __sk_buff, vlan_tci)) { + *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, + ~VLAN_TAG_PRESENT); + } else { + *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 12); + *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, 1); + } + break; case offsetof(struct __sk_buff, cb[0]) ... - offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1: + offsetofend(struct __sk_buff, cb[4]) - 1: BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20); BUILD_BUG_ON((offsetof(struct sk_buff, cb) + offsetof(struct qdisc_skb_cb, data)) % @@ -3165,6 +3470,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, off -= offsetof(struct __sk_buff, tc_classid); off += offsetof(struct sk_buff, cb); off += offsetof(struct qdisc_skb_cb, tc_classid); + *target_size = 2; if (type == BPF_WRITE) *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg, off); @@ -3190,14 +3496,14 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, case offsetof(struct __sk_buff, tc_index): #ifdef CONFIG_NET_SCHED - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2); - if (type == BPF_WRITE) *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, tc_index)); + bpf_target_off(struct sk_buff, tc_index, 2, + target_size)); else *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, tc_index)); + bpf_target_off(struct sk_buff, tc_index, 2, + target_size)); #else if (type == BPF_WRITE) *insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg); @@ -3208,10 +3514,9 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, case offsetof(struct __sk_buff, napi_id): #if defined(CONFIG_NET_RX_BUSY_POLL) - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, napi_id) != 4); - *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, napi_id)); + bpf_target_off(struct sk_buff, napi_id, 4, + target_size)); *insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1); *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); #else @@ -3226,7 +3531,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, static u32 sock_filter_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, - struct bpf_prog *prog) + struct bpf_prog *prog, u32 *target_size) { struct bpf_insn *insn = insn_buf; @@ -3270,22 +3575,22 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type, static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, - struct bpf_prog *prog) + struct bpf_prog *prog, u32 *target_size) { struct bpf_insn *insn = insn_buf; switch (si->off) { case offsetof(struct __sk_buff, ifindex): - BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev), si->dst_reg, si->src_reg, offsetof(struct sk_buff, dev)); *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, - offsetof(struct net_device, ifindex)); + bpf_target_off(struct net_device, ifindex, 4, + target_size)); break; default: - return bpf_convert_ctx_access(type, si, insn_buf, prog); + return bpf_convert_ctx_access(type, si, insn_buf, prog, + target_size); } return insn - insn_buf; @@ -3294,7 +3599,7 @@ static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, static u32 xdp_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, - struct bpf_prog *prog) + struct bpf_prog *prog, u32 *target_size) { struct bpf_insn *insn = insn_buf; @@ -3314,6 +3619,139 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, return insn - insn_buf; } +static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, + u32 *target_size) +{ + struct bpf_insn *insn = insn_buf; + int off; + + switch (si->off) { + case offsetof(struct bpf_sock_ops, op) ... + offsetof(struct bpf_sock_ops, replylong[3]): + BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, op) != + FIELD_SIZEOF(struct bpf_sock_ops_kern, op)); + BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, reply) != + FIELD_SIZEOF(struct bpf_sock_ops_kern, reply)); + BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, replylong) != + FIELD_SIZEOF(struct bpf_sock_ops_kern, replylong)); + off = si->off; + off -= offsetof(struct bpf_sock_ops, op); + off += offsetof(struct bpf_sock_ops_kern, op); + if (type == BPF_WRITE) + *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, + off); + else + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, + off); + break; + + case offsetof(struct bpf_sock_ops, family): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2); + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( + struct bpf_sock_ops_kern, sk), + si->dst_reg, si->src_reg, + offsetof(struct bpf_sock_ops_kern, sk)); + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, + offsetof(struct sock_common, skc_family)); + break; + + case offsetof(struct bpf_sock_ops, remote_ip4): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4); + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( + struct bpf_sock_ops_kern, sk), + si->dst_reg, si->src_reg, + offsetof(struct bpf_sock_ops_kern, sk)); + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, + offsetof(struct sock_common, skc_daddr)); + break; + + case offsetof(struct bpf_sock_ops, local_ip4): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_rcv_saddr) != 4); + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( + struct bpf_sock_ops_kern, sk), + si->dst_reg, si->src_reg, + offsetof(struct bpf_sock_ops_kern, sk)); + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, + offsetof(struct sock_common, + skc_rcv_saddr)); + break; + + case offsetof(struct bpf_sock_ops, remote_ip6[0]) ... + offsetof(struct bpf_sock_ops, remote_ip6[3]): +#if IS_ENABLED(CONFIG_IPV6) + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, + skc_v6_daddr.s6_addr32[0]) != 4); + + off = si->off; + off -= offsetof(struct bpf_sock_ops, remote_ip6[0]); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( + struct bpf_sock_ops_kern, sk), + si->dst_reg, si->src_reg, + offsetof(struct bpf_sock_ops_kern, sk)); + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, + offsetof(struct sock_common, + skc_v6_daddr.s6_addr32[0]) + + off); +#else + *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); +#endif + break; + + case offsetof(struct bpf_sock_ops, local_ip6[0]) ... + offsetof(struct bpf_sock_ops, local_ip6[3]): +#if IS_ENABLED(CONFIG_IPV6) + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, + skc_v6_rcv_saddr.s6_addr32[0]) != 4); + + off = si->off; + off -= offsetof(struct bpf_sock_ops, local_ip6[0]); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( + struct bpf_sock_ops_kern, sk), + si->dst_reg, si->src_reg, + offsetof(struct bpf_sock_ops_kern, sk)); + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, + offsetof(struct sock_common, + skc_v6_rcv_saddr.s6_addr32[0]) + + off); +#else + *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); +#endif + break; + + case offsetof(struct bpf_sock_ops, remote_port): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2); + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( + struct bpf_sock_ops_kern, sk), + si->dst_reg, si->src_reg, + offsetof(struct bpf_sock_ops_kern, sk)); + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, + offsetof(struct sock_common, skc_dport)); +#ifndef __BIG_ENDIAN_BITFIELD + *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16); +#endif + break; + + case offsetof(struct bpf_sock_ops, local_port): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2); + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( + struct bpf_sock_ops_kern, sk), + si->dst_reg, si->src_reg, + offsetof(struct bpf_sock_ops_kern, sk)); + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, + offsetof(struct sock_common, skc_num)); + break; + } + return insn - insn_buf; +} + const struct bpf_verifier_ops sk_filter_prog_ops = { .get_func_proto = sk_filter_func_proto, .is_valid_access = sk_filter_is_valid_access, @@ -3336,7 +3774,7 @@ const struct bpf_verifier_ops xdp_prog_ops = { }; const struct bpf_verifier_ops cg_skb_prog_ops = { - .get_func_proto = cg_skb_func_proto, + .get_func_proto = sk_filter_func_proto, .is_valid_access = sk_filter_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, .test_run = bpf_prog_test_run_skb, @@ -3363,6 +3801,12 @@ const struct bpf_verifier_ops cg_sock_prog_ops = { .convert_ctx_access = sock_filter_convert_ctx_access, }; +const struct bpf_verifier_ops sock_ops_prog_ops = { + .get_func_proto = sock_ops_func_proto, + .is_valid_access = sock_ops_is_valid_access, + .convert_ctx_access = sock_ops_convert_ctx_access, +}; + int sk_detach_filter(struct sock *sk) { int ret = -ENOENT; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 28d94bce4df8..fc5fc4594c90 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -18,6 +18,7 @@ #include <linux/stddef.h> #include <linux/if_ether.h> #include <linux/mpls.h> +#include <linux/tcp.h> #include <net/flow_dissector.h> #include <scsi/fc/fc_fcoe.h> @@ -342,6 +343,64 @@ __skb_flow_dissect_gre(const struct sk_buff *skb, return FLOW_DISSECT_RET_OUT_PROTO_AGAIN; } +static void +__skb_flow_dissect_tcp(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container, void *data, int thoff, int hlen) +{ + struct flow_dissector_key_tcp *key_tcp; + struct tcphdr *th, _th; + + if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_TCP)) + return; + + th = __skb_header_pointer(skb, thoff, sizeof(_th), data, hlen, &_th); + if (!th) + return; + + if (unlikely(__tcp_hdrlen(th) < sizeof(_th))) + return; + + key_tcp = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_TCP, + target_container); + key_tcp->flags = (*(__be16 *) &tcp_flag_word(th) & htons(0x0FFF)); +} + +static void +__skb_flow_dissect_ipv4(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container, void *data, const struct iphdr *iph) +{ + struct flow_dissector_key_ip *key_ip; + + if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IP)) + return; + + key_ip = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_IP, + target_container); + key_ip->tos = iph->tos; + key_ip->ttl = iph->ttl; +} + +static void +__skb_flow_dissect_ipv6(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container, void *data, const struct ipv6hdr *iph) +{ + struct flow_dissector_key_ip *key_ip; + + if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IP)) + return; + + key_ip = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_IP, + target_container); + key_ip->tos = ipv6_get_dsfield(iph); + key_ip->ttl = iph->hop_limit; +} + /** * __skb_flow_dissect - extract the flow_keys struct and return it * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified @@ -444,6 +503,9 @@ ip: } } + __skb_flow_dissect_ipv4(skb, flow_dissector, + target_container, data, iph); + if (flags & FLOW_DISSECTOR_F_STOP_AT_L3) goto out_good; @@ -489,6 +551,9 @@ ipv6: goto out_good; } + __skb_flow_dissect_ipv6(skb, flow_dissector, + target_container, data, iph); + if (flags & FLOW_DISSECTOR_F_STOP_AT_L3) goto out_good; @@ -683,6 +748,10 @@ ip_proto_again: case IPPROTO_MPLS: proto = htons(ETH_P_MPLS_UC); goto mpls; + case IPPROTO_TCP: + __skb_flow_dissect_tcp(skb, flow_dissector, target_container, + data, nhoff, hlen); + break; default: break; } diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index b3bc0a31af9f..1307731ddfe4 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -240,7 +240,8 @@ static const struct nla_policy bpf_nl_policy[LWT_BPF_MAX + 1] = { static int bpf_build_state(struct nlattr *nla, unsigned int family, const void *cfg, - struct lwtunnel_state **ts) + struct lwtunnel_state **ts, + struct netlink_ext_ack *extack) { struct nlattr *tb[LWT_BPF_MAX + 1]; struct lwtunnel_state *newts; @@ -250,7 +251,7 @@ static int bpf_build_state(struct nlattr *nla, if (family != AF_INET && family != AF_INET6) return -EAFNOSUPPORT; - ret = nla_parse_nested(tb, LWT_BPF_MAX, nla, bpf_nl_policy, NULL); + ret = nla_parse_nested(tb, LWT_BPF_MAX, nla, bpf_nl_policy, extack); if (ret < 0) return ret; diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index cfae3d5fe11f..d9cb3532f1dd 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -103,37 +103,53 @@ EXPORT_SYMBOL(lwtunnel_encap_del_ops); int lwtunnel_build_state(u16 encap_type, struct nlattr *encap, unsigned int family, - const void *cfg, struct lwtunnel_state **lws) + const void *cfg, struct lwtunnel_state **lws, + struct netlink_ext_ack *extack) { const struct lwtunnel_encap_ops *ops; + bool found = false; int ret = -EINVAL; if (encap_type == LWTUNNEL_ENCAP_NONE || - encap_type > LWTUNNEL_ENCAP_MAX) + encap_type > LWTUNNEL_ENCAP_MAX) { + NL_SET_ERR_MSG_ATTR(extack, encap, + "Unknown LWT encapsulation type"); return ret; + } ret = -EOPNOTSUPP; rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[encap_type]); if (likely(ops && ops->build_state && try_module_get(ops->owner))) { - ret = ops->build_state(encap, family, cfg, lws); + found = true; + ret = ops->build_state(encap, family, cfg, lws, extack); if (ret) module_put(ops->owner); } rcu_read_unlock(); + /* don't rely on -EOPNOTSUPP to detect match as build_state + * handlers could return it + */ + if (!found) { + NL_SET_ERR_MSG_ATTR(extack, encap, + "LWT encapsulation type not supported"); + } + return ret; } EXPORT_SYMBOL(lwtunnel_build_state); -int lwtunnel_valid_encap_type(u16 encap_type) +int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack) { const struct lwtunnel_encap_ops *ops; int ret = -EINVAL; if (encap_type == LWTUNNEL_ENCAP_NONE || - encap_type > LWTUNNEL_ENCAP_MAX) + encap_type > LWTUNNEL_ENCAP_MAX) { + NL_SET_ERR_MSG(extack, "Unknown lwt encapsulation type"); return ret; + } rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[encap_type]); @@ -153,11 +169,16 @@ int lwtunnel_valid_encap_type(u16 encap_type) } } #endif - return ops ? 0 : -EOPNOTSUPP; + ret = ops ? 0 : -EOPNOTSUPP; + if (ret < 0) + NL_SET_ERR_MSG(extack, "lwt encapsulation type not supported"); + + return ret; } EXPORT_SYMBOL(lwtunnel_valid_encap_type); -int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining) +int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining, + struct netlink_ext_ack *extack) { struct rtnexthop *rtnh = (struct rtnexthop *)attr; struct nlattr *nla_entype; @@ -174,7 +195,8 @@ int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining) if (nla_entype) { encap_type = nla_get_u16(nla_entype); - if (lwtunnel_valid_encap_type(encap_type) != 0) + if (lwtunnel_valid_encap_type(encap_type, + extack) != 0) return -EOPNOTSUPP; } } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index d274f81fcc2c..e31fc11a8000 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -118,6 +118,50 @@ unsigned long neigh_rand_reach_time(unsigned long base) EXPORT_SYMBOL(neigh_rand_reach_time); +static bool neigh_del(struct neighbour *n, __u8 state, + struct neighbour __rcu **np, struct neigh_table *tbl) +{ + bool retval = false; + + write_lock(&n->lock); + if (refcount_read(&n->refcnt) == 1 && !(n->nud_state & state)) { + struct neighbour *neigh; + + neigh = rcu_dereference_protected(n->next, + lockdep_is_held(&tbl->lock)); + rcu_assign_pointer(*np, neigh); + n->dead = 1; + retval = true; + } + write_unlock(&n->lock); + if (retval) + neigh_cleanup_and_release(n); + return retval; +} + +bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl) +{ + struct neigh_hash_table *nht; + void *pkey = ndel->primary_key; + u32 hash_val; + struct neighbour *n; + struct neighbour __rcu **np; + + nht = rcu_dereference_protected(tbl->nht, + lockdep_is_held(&tbl->lock)); + hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd); + hash_val = hash_val >> (32 - nht->hash_shift); + + np = &nht->hash_buckets[hash_val]; + while ((n = rcu_dereference_protected(*np, + lockdep_is_held(&tbl->lock)))) { + if (n == ndel) + return neigh_del(n, 0, np, tbl); + np = &n->next; + } + return false; +} + static int neigh_forced_gc(struct neigh_table *tbl) { int shrunk = 0; @@ -140,19 +184,10 @@ static int neigh_forced_gc(struct neigh_table *tbl) * - nobody refers to it. * - it is not permanent */ - write_lock(&n->lock); - if (atomic_read(&n->refcnt) == 1 && - !(n->nud_state & NUD_PERMANENT)) { - rcu_assign_pointer(*np, - rcu_dereference_protected(n->next, - lockdep_is_held(&tbl->lock))); - n->dead = 1; - shrunk = 1; - write_unlock(&n->lock); - neigh_cleanup_and_release(n); + if (neigh_del(n, NUD_PERMANENT, np, tbl)) { + shrunk = 1; continue; } - write_unlock(&n->lock); np = &n->next; } } @@ -219,7 +254,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) neigh_del_timer(n); n->dead = 1; - if (atomic_read(&n->refcnt) != 1) { + if (refcount_read(&n->refcnt) != 1) { /* The most unpleasant situation. We must destroy neighbour entry, but someone still uses it. @@ -300,7 +335,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device NEIGH_CACHE_STAT_INC(tbl, allocs); n->tbl = tbl; - atomic_set(&n->refcnt, 1); + refcount_set(&n->refcnt, 1); n->dead = 1; out: return n; @@ -409,7 +444,7 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, rcu_read_lock_bh(); n = __neigh_lookup_noref(tbl, pkey, dev); if (n) { - if (!atomic_inc_not_zero(&n->refcnt)) + if (!refcount_inc_not_zero(&n->refcnt)) n = NULL; NEIGH_CACHE_STAT_INC(tbl, hits); } @@ -438,7 +473,7 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, n = rcu_dereference_bh(n->next)) { if (!memcmp(n->primary_key, pkey, key_len) && net_eq(dev_net(n->dev), net)) { - if (!atomic_inc_not_zero(&n->refcnt)) + if (!refcount_inc_not_zero(&n->refcnt)) n = NULL; NEIGH_CACHE_STAT_INC(tbl, hits); break; @@ -674,7 +709,7 @@ static void neigh_parms_destroy(struct neigh_parms *parms); static inline void neigh_parms_put(struct neigh_parms *parms) { - if (atomic_dec_and_test(&parms->refcnt)) + if (refcount_dec_and_test(&parms->refcnt)) neigh_parms_destroy(parms); } @@ -786,7 +821,7 @@ static void neigh_periodic_work(struct work_struct *work) if (time_before(n->used, n->confirmed)) n->used = n->confirmed; - if (atomic_read(&n->refcnt) == 1 && + if (refcount_read(&n->refcnt) == 1 && (state == NUD_FAILED || time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) { *np = n->next; @@ -1444,7 +1479,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL); if (p) { p->tbl = tbl; - atomic_set(&p->refcnt, 1); + refcount_set(&p->refcnt, 1); p->reachable_time = neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); dev_hold(dev); @@ -1507,7 +1542,7 @@ void neigh_table_init(int index, struct neigh_table *tbl) INIT_LIST_HEAD(&tbl->parms_list); list_add(&tbl->parms.list, &tbl->parms_list); write_pnet(&tbl->parms.net, &init_net); - atomic_set(&tbl->parms.refcnt, 1); + refcount_set(&tbl->parms.refcnt, 1); tbl->parms.reachable_time = neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME)); @@ -1649,7 +1684,10 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN, NETLINK_CB(skb).portid); + write_lock_bh(&tbl->lock); neigh_release(neigh); + neigh_remove_one(neigh, tbl); + write_unlock_bh(&tbl->lock); out: return err; @@ -1758,7 +1796,7 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) if ((parms->dev && nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) || - nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) || + nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) || nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, NEIGH_VAR(parms, QUEUE_LEN_BYTES)) || /* approximative value for deprecated QUEUE_LEN (in packets) */ @@ -2196,7 +2234,7 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, ci.ndm_used = jiffies_to_clock_t(now - neigh->used); ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed); ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated); - ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1; + ci.ndm_refcnt = refcount_read(&neigh->refcnt) - 1; read_unlock_bh(&neigh->lock); if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) || diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 14d09345f00d..4847964931df 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -363,15 +363,10 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v) netif_addr_lock_bh(dev); netdev_for_each_mc_addr(ha, dev) { - int i; - - seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex, - dev->name, ha->refcount, ha->global_use); - - for (i = 0; i < dev->addr_len; i++) - seq_printf(seq, "%02x", ha->addr[i]); - - seq_putc(seq, '\n'); + seq_printf(seq, "%-4d %-15s %-5d %-5d %*phN\n", + dev->ifindex, dev->name, + ha->refcount, ha->global_use, + (int)dev->addr_len, ha->addr); } netif_addr_unlock_bh(dev); return 0; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 65ea0ff4017c..b4f9922b6f23 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -323,7 +323,11 @@ NETDEVICE_SHOW_RW(flags, fmt_hex); static int change_tx_queue_len(struct net_device *dev, unsigned long new_len) { - int res, orig_len = dev->tx_queue_len; + unsigned int orig_len = dev->tx_queue_len; + int res; + + if (new_len != (unsigned int)new_len) + return -ERANGE; if (new_len != orig_len) { dev->tx_queue_len = new_len; @@ -349,7 +353,7 @@ static ssize_t tx_queue_len_store(struct device *dev, return netdev_store(dev, attr, buf, len, change_tx_queue_len); } -NETDEVICE_SHOW_RW(tx_queue_len, fmt_ulong); +NETDEVICE_SHOW_RW(tx_queue_len, fmt_dec); static int change_gro_flush_timeout(struct net_device *dev, unsigned long val) { @@ -622,7 +626,7 @@ static struct attribute *netstat_attrs[] = { }; -static struct attribute_group netstat_group = { +static const struct attribute_group netstat_group = { .name = "statistics", .attrs = netstat_attrs, }; @@ -632,7 +636,7 @@ static struct attribute *wireless_attrs[] = { NULL }; -static struct attribute_group wireless_group = { +static const struct attribute_group wireless_group = { .name = "wireless", .attrs = wireless_attrs, }; @@ -1200,7 +1204,7 @@ static struct attribute *dql_attrs[] = { NULL }; -static struct attribute_group dql_group = { +static const struct attribute_group dql_group = { .name = "byte_queue_limits", .attrs = dql_attrs, }; @@ -1444,7 +1448,7 @@ static void *net_grab_current_ns(void) struct net *ns = current->nsproxy->net_ns; #ifdef CONFIG_NET_NS if (ns) - atomic_inc(&ns->passive); + refcount_inc(&ns->passive); #endif return ns; } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 26bbfababff2..8726d051f31d 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -284,7 +284,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) LIST_HEAD(net_exit_list); atomic_set(&net->count, 1); - atomic_set(&net->passive, 1); + refcount_set(&net->passive, 1); net->dev_base_seq = 1; net->user_ns = user_ns; idr_init(&net->netns_ids); @@ -380,7 +380,7 @@ static void net_free(struct net *net) void net_drop_ns(void *p) { struct net *ns = p; - if (ns && atomic_dec_and_test(&ns->passive)) + if (ns && refcount_dec_and_test(&ns->passive)) net_free(ns); } @@ -501,6 +501,23 @@ static void cleanup_net(struct work_struct *work) net_drop_ns(net); } } + +/** + * net_ns_barrier - wait until concurrent net_cleanup_work is done + * + * cleanup_net runs from work queue and will first remove namespaces + * from the global list, then run net exit functions. + * + * Call this in module exit path to make sure that all netns + * ->exit ops have been invoked before the function is removed. + */ +void net_ns_barrier(void) +{ + mutex_lock(&net_mutex); + mutex_unlock(&net_mutex); +} +EXPORT_SYMBOL(net_ns_barrier); + static DECLARE_WORK(net_cleanup_work, cleanup_net); void __put_net(struct net *net) @@ -596,6 +613,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, { struct net *net = sock_net(skb->sk); struct nlattr *tb[NETNSA_MAX + 1]; + struct nlattr *nla; struct net *peer; int nsid, err; @@ -603,23 +621,35 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, rtnl_net_policy, extack); if (err < 0) return err; - if (!tb[NETNSA_NSID]) + if (!tb[NETNSA_NSID]) { + NL_SET_ERR_MSG(extack, "nsid is missing"); return -EINVAL; + } nsid = nla_get_s32(tb[NETNSA_NSID]); - if (tb[NETNSA_PID]) + if (tb[NETNSA_PID]) { peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID])); - else if (tb[NETNSA_FD]) + nla = tb[NETNSA_PID]; + } else if (tb[NETNSA_FD]) { peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD])); - else + nla = tb[NETNSA_FD]; + } else { + NL_SET_ERR_MSG(extack, "Peer netns reference is missing"); return -EINVAL; - if (IS_ERR(peer)) + } + if (IS_ERR(peer)) { + NL_SET_BAD_ATTR(extack, nla); + NL_SET_ERR_MSG(extack, "Peer netns reference is invalid"); return PTR_ERR(peer); + } spin_lock_bh(&net->nsid_lock); if (__peernet2id(net, peer) >= 0) { spin_unlock_bh(&net->nsid_lock); err = -EEXIST; + NL_SET_BAD_ATTR(extack, nla); + NL_SET_ERR_MSG(extack, + "Peer netns already has a nsid assigned"); goto out; } @@ -628,6 +658,10 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, if (err >= 0) { rtnl_net_notifyid(net, RTM_NEWNSID, err); err = 0; + } else if (err == -ENOSPC && nsid >= 0) { + err = -EEXIST; + NL_SET_BAD_ATTR(extack, tb[NETNSA_NSID]); + NL_SET_ERR_MSG(extack, "The specified nsid is already used"); } out: put_net(peer); @@ -670,6 +704,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh, { struct net *net = sock_net(skb->sk); struct nlattr *tb[NETNSA_MAX + 1]; + struct nlattr *nla; struct sk_buff *msg; struct net *peer; int err, id; @@ -678,15 +713,22 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh, rtnl_net_policy, extack); if (err < 0) return err; - if (tb[NETNSA_PID]) + if (tb[NETNSA_PID]) { peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID])); - else if (tb[NETNSA_FD]) + nla = tb[NETNSA_PID]; + } else if (tb[NETNSA_FD]) { peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD])); - else + nla = tb[NETNSA_FD]; + } else { + NL_SET_ERR_MSG(extack, "Peer netns reference is missing"); return -EINVAL; + } - if (IS_ERR(peer)) + if (IS_ERR(peer)) { + NL_SET_BAD_ATTR(extack, nla); + NL_SET_ERR_MSG(extack, "Peer netns reference is invalid"); return PTR_ERR(peer); + } msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL); if (!msg) { diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 29be2466970c..d3408a693166 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -277,7 +277,7 @@ static void zap_completion_queue(void) struct sk_buff *skb = clist; clist = clist->next; if (!skb_irq_freeable(skb)) { - atomic_inc(&skb->users); + refcount_inc(&skb->users); dev_kfree_skb_any(skb); /* put this one back */ } else { __kfree_skb(skb); @@ -309,7 +309,7 @@ repeat: return NULL; } - atomic_set(&skb->users, 1); + refcount_set(&skb->users, 1); skb_reserve(skb, reserve); return skb; } @@ -441,7 +441,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) ip6h->saddr = np->local_ip.in6; ip6h->daddr = np->remote_ip.in6; - eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); + eth = skb_push(skb, ETH_HLEN); skb_reset_mac_header(skb); skb->protocol = eth->h_proto = htons(ETH_P_IPV6); } else { @@ -470,7 +470,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) put_unaligned(np->remote_ip.ip, &(iph->daddr)); iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); - eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); + eth = skb_push(skb, ETH_HLEN); skb_reset_mac_header(skb); skb->protocol = eth->h_proto = htons(ETH_P_IP); } @@ -632,7 +632,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) skb_queue_head_init(&npinfo->txq); INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); - atomic_set(&npinfo->refcnt, 1); + refcount_set(&npinfo->refcnt, 1); ops = np->dev->netdev_ops; if (ops->ndo_netpoll_setup) { @@ -642,7 +642,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) } } else { npinfo = rtnl_dereference(ndev->npinfo); - atomic_inc(&npinfo->refcnt); + refcount_inc(&npinfo->refcnt); } npinfo->netpoll = np; @@ -821,7 +821,7 @@ void __netpoll_cleanup(struct netpoll *np) synchronize_srcu(&netpoll_srcu); - if (atomic_dec_and_test(&npinfo->refcnt)) { + if (refcount_dec_and_test(&npinfo->refcnt)) { const struct net_device_ops *ops; ops = np->dev->netdev_ops; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 96947f5d41e4..6e1e10ff433a 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2675,7 +2675,7 @@ static int process_ipsec(struct pktgen_dev *pkt_dev, goto err; } /* restore ll */ - eth = (struct ethhdr *)skb_push(skb, ETH_HLEN); + eth = skb_push(skb, ETH_HLEN); memcpy(eth, pkt_dev->hh, 2 * ETH_ALEN); eth->h_proto = protocol; @@ -2714,11 +2714,11 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, struct timeval timestamp; struct pktgen_hdr *pgh; - pgh = (struct pktgen_hdr *)skb_put(skb, sizeof(*pgh)); + pgh = skb_put(skb, sizeof(*pgh)); datalen -= sizeof(*pgh); if (pkt_dev->nfrags <= 0) { - memset(skb_put(skb, datalen), 0, datalen); + skb_put_zero(skb, datalen); } else { int frags = pkt_dev->nfrags; int i, len; @@ -2729,7 +2729,7 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, frags = MAX_SKB_FRAGS; len = datalen - frags * PAGE_SIZE; if (len > 0) { - memset(skb_put(skb, len), 0, len); + skb_put_zero(skb, len); datalen = frags * PAGE_SIZE; } @@ -2844,34 +2844,35 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, skb_reserve(skb, 16); /* Reserve for ethernet and IP header */ - eth = (__u8 *) skb_push(skb, 14); - mpls = (__be32 *)skb_put(skb, pkt_dev->nr_labels*sizeof(__u32)); + eth = skb_push(skb, 14); + mpls = skb_put(skb, pkt_dev->nr_labels * sizeof(__u32)); if (pkt_dev->nr_labels) mpls_push(mpls, pkt_dev); if (pkt_dev->vlan_id != 0xffff) { if (pkt_dev->svlan_id != 0xffff) { - svlan_tci = (__be16 *)skb_put(skb, sizeof(__be16)); + svlan_tci = skb_put(skb, sizeof(__be16)); *svlan_tci = build_tci(pkt_dev->svlan_id, pkt_dev->svlan_cfi, pkt_dev->svlan_p); - svlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16)); + svlan_encapsulated_proto = skb_put(skb, + sizeof(__be16)); *svlan_encapsulated_proto = htons(ETH_P_8021Q); } - vlan_tci = (__be16 *)skb_put(skb, sizeof(__be16)); + vlan_tci = skb_put(skb, sizeof(__be16)); *vlan_tci = build_tci(pkt_dev->vlan_id, pkt_dev->vlan_cfi, pkt_dev->vlan_p); - vlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16)); + vlan_encapsulated_proto = skb_put(skb, sizeof(__be16)); *vlan_encapsulated_proto = htons(ETH_P_IP); } skb_reset_mac_header(skb); skb_set_network_header(skb, skb->len); - iph = (struct iphdr *) skb_put(skb, sizeof(struct iphdr)); + iph = skb_put(skb, sizeof(struct iphdr)); skb_set_transport_header(skb, skb->len); - udph = (struct udphdr *) skb_put(skb, sizeof(struct udphdr)); + udph = skb_put(skb, sizeof(struct udphdr)); skb_set_queue_mapping(skb, queue_map); skb->priority = pkt_dev->skb_priority; @@ -2971,34 +2972,35 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, skb_reserve(skb, 16); /* Reserve for ethernet and IP header */ - eth = (__u8 *) skb_push(skb, 14); - mpls = (__be32 *)skb_put(skb, pkt_dev->nr_labels*sizeof(__u32)); + eth = skb_push(skb, 14); + mpls = skb_put(skb, pkt_dev->nr_labels * sizeof(__u32)); if (pkt_dev->nr_labels) mpls_push(mpls, pkt_dev); if (pkt_dev->vlan_id != 0xffff) { if (pkt_dev->svlan_id != 0xffff) { - svlan_tci = (__be16 *)skb_put(skb, sizeof(__be16)); + svlan_tci = skb_put(skb, sizeof(__be16)); *svlan_tci = build_tci(pkt_dev->svlan_id, pkt_dev->svlan_cfi, pkt_dev->svlan_p); - svlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16)); + svlan_encapsulated_proto = skb_put(skb, + sizeof(__be16)); *svlan_encapsulated_proto = htons(ETH_P_8021Q); } - vlan_tci = (__be16 *)skb_put(skb, sizeof(__be16)); + vlan_tci = skb_put(skb, sizeof(__be16)); *vlan_tci = build_tci(pkt_dev->vlan_id, pkt_dev->vlan_cfi, pkt_dev->vlan_p); - vlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16)); + vlan_encapsulated_proto = skb_put(skb, sizeof(__be16)); *vlan_encapsulated_proto = htons(ETH_P_IPV6); } skb_reset_mac_header(skb); skb_set_network_header(skb, skb->len); - iph = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr)); + iph = skb_put(skb, sizeof(struct ipv6hdr)); skb_set_transport_header(skb, skb->len); - udph = (struct udphdr *) skb_put(skb, sizeof(struct udphdr)); + udph = skb_put(skb, sizeof(struct udphdr)); skb_set_queue_mapping(skb, queue_map); skb->priority = pkt_dev->skb_priority; @@ -3361,7 +3363,7 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev) { ktime_t idle_start = ktime_get(); - while (atomic_read(&(pkt_dev->skb->users)) != 1) { + while (refcount_read(&(pkt_dev->skb->users)) != 1) { if (signal_pending(current)) break; @@ -3418,7 +3420,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) if (pkt_dev->xmit_mode == M_NETIF_RECEIVE) { skb = pkt_dev->skb; skb->protocol = eth_type_trans(skb, skb->dev); - atomic_add(burst, &skb->users); + refcount_add(burst, &skb->users); local_bh_disable(); do { ret = netif_receive_skb(skb); @@ -3426,11 +3428,11 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->errors++; pkt_dev->sofar++; pkt_dev->seq_num++; - if (atomic_read(&skb->users) != burst) { + if (refcount_read(&skb->users) != burst) { /* skb was queued by rps/rfs or taps, * so cannot reuse this skb */ - atomic_sub(burst - 1, &skb->users); + WARN_ON(refcount_sub_and_test(burst - 1, &skb->users)); /* get out of the loop and wait * until skb is consumed */ @@ -3444,7 +3446,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) goto out; /* Skips xmit_mode M_START_XMIT */ } else if (pkt_dev->xmit_mode == M_QUEUE_XMIT) { local_bh_disable(); - atomic_inc(&pkt_dev->skb->users); + refcount_inc(&pkt_dev->skb->users); ret = dev_queue_xmit(pkt_dev->skb); switch (ret) { @@ -3485,7 +3487,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->last_ok = 0; goto unlock; } - atomic_add(burst, &pkt_dev->skb->users); + refcount_add(burst, &pkt_dev->skb->users); xmit_more: ret = netdev_start_xmit(pkt_dev->skb, odev, txq, --burst > 0); @@ -3511,11 +3513,11 @@ xmit_more: /* fallthru */ case NETDEV_TX_BUSY: /* Retry it next time */ - atomic_dec(&(pkt_dev->skb->users)); + refcount_dec(&(pkt_dev->skb->users)); pkt_dev->last_ok = 0; } if (unlikely(burst)) - atomic_sub(burst, &pkt_dev->skb->users); + WARN_ON(refcount_sub_and_test(burst, &pkt_dev->skb->users)); unlock: HARD_TX_UNLOCK(odev, txq); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 467a2f4510a7..d1ba90980be1 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -16,6 +16,7 @@ * Vitaly E. Lavrov RTA_OK arithmetics was wrong. */ +#include <linux/bitops.h> #include <linux/errno.h> #include <linux/module.h> #include <linux/types.h> @@ -39,6 +40,7 @@ #include <linux/if_vlan.h> #include <linux/pci.h> #include <linux/etherdevice.h> +#include <linux/bpf.h> #include <linux/uaccess.h> @@ -647,7 +649,7 @@ int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int g NETLINK_CB(skb).dst_group = group; if (echo) - atomic_inc(&skb->users); + refcount_inc(&skb->users); netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL); if (echo) err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); @@ -899,7 +901,8 @@ static size_t rtnl_port_size(const struct net_device *dev, static size_t rtnl_xdp_size(void) { size_t xdp_size = nla_total_size(0) + /* nest IFLA_XDP */ - nla_total_size(1); /* XDP_ATTACHED */ + nla_total_size(1) + /* XDP_ATTACHED */ + nla_total_size(4); /* XDP_PROG_ID */ return xdp_size; } @@ -942,6 +945,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */ + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */ + rtnl_xdp_size() /* IFLA_XDP */ + + nla_total_size(4) /* IFLA_EVENT */ + nla_total_size(1); /* IFLA_PROTO_DOWN */ } @@ -1248,23 +1252,29 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) return 0; } -static u8 rtnl_xdp_attached_mode(struct net_device *dev) +static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id) { const struct net_device_ops *ops = dev->netdev_ops; + const struct bpf_prog *generic_xdp_prog; ASSERT_RTNL(); - if (rcu_access_pointer(dev->xdp_prog)) + *prog_id = 0; + generic_xdp_prog = rtnl_dereference(dev->xdp_prog); + if (generic_xdp_prog) { + *prog_id = generic_xdp_prog->aux->id; return XDP_ATTACHED_SKB; - if (ops->ndo_xdp && __dev_xdp_attached(dev, ops->ndo_xdp)) - return XDP_ATTACHED_DRV; + } + if (!ops->ndo_xdp) + return XDP_ATTACHED_NONE; - return XDP_ATTACHED_NONE; + return __dev_xdp_attached(dev, ops->ndo_xdp, prog_id); } static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev) { struct nlattr *xdp; + u32 prog_id; int err; xdp = nla_nest_start(skb, IFLA_XDP); @@ -1272,10 +1282,16 @@ static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev) return -EMSGSIZE; err = nla_put_u8(skb, IFLA_XDP_ATTACHED, - rtnl_xdp_attached_mode(dev)); + rtnl_xdp_attached_mode(dev, &prog_id)); if (err) goto err_cancel; + if (prog_id) { + err = nla_put_u32(skb, IFLA_XDP_PROG_ID, prog_id); + if (err) + goto err_cancel; + } + nla_nest_end(skb, xdp); return 0; @@ -1284,9 +1300,40 @@ err_cancel: return err; } +static u32 rtnl_get_event(unsigned long event) +{ + u32 rtnl_event_type = IFLA_EVENT_NONE; + + switch (event) { + case NETDEV_REBOOT: + rtnl_event_type = IFLA_EVENT_REBOOT; + break; + case NETDEV_FEAT_CHANGE: + rtnl_event_type = IFLA_EVENT_FEATURES; + break; + case NETDEV_BONDING_FAILOVER: + rtnl_event_type = IFLA_EVENT_BONDING_FAILOVER; + break; + case NETDEV_NOTIFY_PEERS: + rtnl_event_type = IFLA_EVENT_NOTIFY_PEERS; + break; + case NETDEV_RESEND_IGMP: + rtnl_event_type = IFLA_EVENT_IGMP_RESEND; + break; + case NETDEV_CHANGEINFODATA: + rtnl_event_type = IFLA_EVENT_BONDING_OPTIONS; + break; + default: + break; + } + + return rtnl_event_type; +} + static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, - unsigned int flags, u32 ext_filter_mask) + unsigned int flags, u32 ext_filter_mask, + u32 event) { struct ifinfomsg *ifm; struct nlmsghdr *nlh; @@ -1335,6 +1382,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down)) goto nla_put_failure; + if (event != IFLA_EVENT_NONE) { + if (nla_put_u32(skb, IFLA_EVENT, event)) + goto nla_put_failure; + } + if (rtnl_fill_link_ifmap(skb, dev)) goto nla_put_failure; @@ -1469,6 +1521,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_LINK_NETNSID] = { .type = NLA_S32 }, [IFLA_PROTO_DOWN] = { .type = NLA_U8 }, [IFLA_XDP] = { .type = NLA_NESTED }, + [IFLA_EVENT] = { .type = NLA_U32 }, [IFLA_GROUP] = { .type = NLA_U32 }, }; @@ -1517,6 +1570,7 @@ static const struct nla_policy ifla_xdp_policy[IFLA_XDP_MAX + 1] = { [IFLA_XDP_FD] = { .type = NLA_S32 }, [IFLA_XDP_ATTACHED] = { .type = NLA_U8 }, [IFLA_XDP_FLAGS] = { .type = NLA_U32 }, + [IFLA_XDP_PROG_ID] = { .type = NLA_U32 }, }; static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla) @@ -1629,7 +1683,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 0, flags, - ext_filter_mask); + ext_filter_mask, 0); if (err < 0) { if (likely(skb->len)) @@ -2051,8 +2105,8 @@ static int do_setlink(const struct sk_buff *skb, } if (tb[IFLA_TXQLEN]) { - unsigned long value = nla_get_u32(tb[IFLA_TXQLEN]); - unsigned long orig_len = dev->tx_queue_len; + unsigned int value = nla_get_u32(tb[IFLA_TXQLEN]); + unsigned int orig_len = dev->tx_queue_len; if (dev->tx_queue_len ^ value) { dev->tx_queue_len = value; @@ -2189,7 +2243,7 @@ static int do_setlink(const struct sk_buff *skb, if (err < 0) goto errout; - if (xdp[IFLA_XDP_ATTACHED]) { + if (xdp[IFLA_XDP_ATTACHED] || xdp[IFLA_XDP_PROG_ID]) { err = -EINVAL; goto errout; } @@ -2200,8 +2254,7 @@ static int do_setlink(const struct sk_buff *skb, err = -EINVAL; goto errout; } - if ((xdp_flags & XDP_FLAGS_SKB_MODE) && - (xdp_flags & XDP_FLAGS_DRV_MODE)) { + if (hweight32(xdp_flags & XDP_FLAGS_MODES) > 1) { err = -EINVAL; goto errout; } @@ -2529,7 +2582,7 @@ replay: data = attr; } if (ops->validate) { - err = ops->validate(tb, data); + err = ops->validate(tb, data, extack); if (err < 0) return err; } @@ -2548,7 +2601,8 @@ replay: slave_data = slave_attr; } if (m_ops->slave_validate) { - err = m_ops->slave_validate(tb, slave_data); + err = m_ops->slave_validate(tb, slave_data, + extack); if (err < 0) return err; } @@ -2567,7 +2621,7 @@ replay: !ops->changelink) return -EOPNOTSUPP; - err = ops->changelink(dev, tb, data); + err = ops->changelink(dev, tb, data, extack); if (err < 0) return err; status |= DO_SETLINK_NOTIFY; @@ -2578,7 +2632,8 @@ replay: return -EOPNOTSUPP; err = m_ops->slave_changelink(master_dev, dev, - tb, slave_data); + tb, slave_data, + extack); if (err < 0) return err; status |= DO_SETLINK_NOTIFY; @@ -2652,7 +2707,8 @@ replay: dev->ifindex = ifm->ifi_index; if (ops->newlink) { - err = ops->newlink(link_net ? : net, dev, tb, data); + err = ops->newlink(link_net ? : net, dev, tb, data, + extack); /* Drivers should call free_netdev() in ->destructor * and unregister it on failure after registration * so that device could be finally freed in rtnl_unlock. @@ -2739,7 +2795,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, return -ENOBUFS; err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid, - nlh->nlmsg_seq, 0, 0, ext_filter_mask); + nlh->nlmsg_seq, 0, 0, ext_filter_mask, 0); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size */ WARN_ON(err == -EMSGSIZE); @@ -2811,7 +2867,8 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) } struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, - unsigned int change, gfp_t flags) + unsigned int change, + u32 event, gfp_t flags) { struct net *net = dev_net(dev); struct sk_buff *skb; @@ -2822,7 +2879,7 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, if (skb == NULL) goto errout; - err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0); + err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0, event); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -2843,18 +2900,25 @@ void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags) rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, flags); } -void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, - gfp_t flags) +static void rtmsg_ifinfo_event(int type, struct net_device *dev, + unsigned int change, u32 event, + gfp_t flags) { struct sk_buff *skb; if (dev->reg_state != NETREG_REGISTERED) return; - skb = rtmsg_ifinfo_build_skb(type, dev, change, flags); + skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags); if (skb) rtmsg_ifinfo_send(skb, dev, flags); } + +void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, + gfp_t flags) +{ + rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags); +} EXPORT_SYMBOL(rtmsg_ifinfo); static int nlmsg_populate_fdb_fill(struct sk_buff *skb, @@ -4159,6 +4223,18 @@ static void rtnetlink_rcv(struct sk_buff *skb) rtnl_unlock(); } +static int rtnetlink_bind(struct net *net, int group) +{ + switch (group) { + case RTNLGRP_IPV4_MROUTE_R: + case RTNLGRP_IPV6_MROUTE_R: + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + break; + } + return 0; +} + static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); @@ -4171,7 +4247,8 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi case NETDEV_NOTIFY_PEERS: case NETDEV_RESEND_IGMP: case NETDEV_CHANGEINFODATA: - rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL); + rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event), + GFP_KERNEL); break; default: break; @@ -4192,6 +4269,7 @@ static int __net_init rtnetlink_net_init(struct net *net) .input = rtnetlink_rcv, .cb_mutex = &rtnl_mutex, .flags = NL_CFG_F_NONROOT_RECV, + .bind = rtnetlink_bind, }; sk = netlink_kernel_create(net, NETLINK_ROUTE, &cfg); diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index ae35cce3a40d..7232274de334 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -51,7 +51,8 @@ static u32 seq_scale(u32 seq) #endif #if IS_ENABLED(CONFIG_IPV6) -u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr) +u32 secure_tcpv6_ts_off(const struct net *net, + const __be32 *saddr, const __be32 *daddr) { const struct { struct in6_addr saddr; @@ -61,7 +62,7 @@ u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr) .daddr = *(struct in6_addr *)daddr, }; - if (sysctl_tcp_timestamps != 1) + if (net->ipv4.sysctl_tcp_timestamps != 1) return 0; ts_secret_init(); @@ -113,9 +114,9 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #endif #ifdef CONFIG_INET -u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr) +u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr) { - if (sysctl_tcp_timestamps != 1) + if (net->ipv4.sysctl_tcp_timestamps != 1) return 0; ts_secret_init(); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b1be7c01efe2..8b11341ed69a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -176,7 +176,7 @@ struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node) memset(skb, 0, offsetof(struct sk_buff, tail)); skb->head = NULL; skb->truesize = sizeof(struct sk_buff); - atomic_set(&skb->users, 1); + refcount_set(&skb->users, 1); skb->mac_header = (typeof(skb->mac_header))~0U; out: @@ -247,7 +247,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, /* Account for allocated memory : skb + skb->head */ skb->truesize = SKB_TRUESIZE(size); skb->pfmemalloc = pfmemalloc; - atomic_set(&skb->users, 1); + refcount_set(&skb->users, 1); skb->head = data; skb->data = data; skb_reset_tail_pointer(skb); @@ -268,7 +268,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, kmemcheck_annotate_bitfield(&fclones->skb2, flags1); skb->fclone = SKB_FCLONE_ORIG; - atomic_set(&fclones->fclone_ref, 1); + refcount_set(&fclones->fclone_ref, 1); fclones->skb2.fclone = SKB_FCLONE_CLONE; } @@ -314,7 +314,7 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size) memset(skb, 0, offsetof(struct sk_buff, tail)); skb->truesize = SKB_TRUESIZE(size); - atomic_set(&skb->users, 1); + refcount_set(&skb->users, 1); skb->head = data; skb->data = data; skb_reset_tail_pointer(skb); @@ -629,7 +629,7 @@ static void kfree_skbmem(struct sk_buff *skb) * This test would have no chance to be true for the clone, * while here, branch prediction will be good. */ - if (atomic_read(&fclones->fclone_ref) == 1) + if (refcount_read(&fclones->fclone_ref) == 1) goto fastpath; break; @@ -637,18 +637,16 @@ static void kfree_skbmem(struct sk_buff *skb) fclones = container_of(skb, struct sk_buff_fclones, skb2); break; } - if (!atomic_dec_and_test(&fclones->fclone_ref)) + if (!refcount_dec_and_test(&fclones->fclone_ref)) return; fastpath: kmem_cache_free(skbuff_fclone_cache, fclones); } -static void skb_release_head_state(struct sk_buff *skb) +void skb_release_head_state(struct sk_buff *skb) { skb_dst_drop(skb); -#ifdef CONFIG_XFRM - secpath_put(skb->sp); -#endif + secpath_reset(skb); if (skb->destructor) { WARN_ON(in_irq()); skb->destructor(skb); @@ -694,12 +692,9 @@ EXPORT_SYMBOL(__kfree_skb); */ void kfree_skb(struct sk_buff *skb) { - if (unlikely(!skb)) - return; - if (likely(atomic_read(&skb->users) == 1)) - smp_rmb(); - else if (likely(!atomic_dec_and_test(&skb->users))) + if (!skb_unref(skb)) return; + trace_kfree_skb(skb, __builtin_return_address(0)); __kfree_skb(skb); } @@ -746,17 +741,32 @@ EXPORT_SYMBOL(skb_tx_error); */ void consume_skb(struct sk_buff *skb) { - if (unlikely(!skb)) - return; - if (likely(atomic_read(&skb->users) == 1)) - smp_rmb(); - else if (likely(!atomic_dec_and_test(&skb->users))) + if (!skb_unref(skb)) return; + trace_consume_skb(skb); __kfree_skb(skb); } EXPORT_SYMBOL(consume_skb); +/** + * consume_stateless_skb - free an skbuff, assuming it is stateless + * @skb: buffer to free + * + * Works like consume_skb(), but this variant assumes that all the head + * states have been already dropped. + */ +void consume_stateless_skb(struct sk_buff *skb) +{ + if (!skb_unref(skb)) + return; + + trace_consume_skb(skb); + if (likely(skb->head)) + skb_release_data(skb); + kfree_skbmem(skb); +} + void __kfree_skb_flush(void) { struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); @@ -807,10 +817,9 @@ void napi_consume_skb(struct sk_buff *skb, int budget) return; } - if (likely(atomic_read(&skb->users) == 1)) - smp_rmb(); - else if (likely(!atomic_dec_and_test(&skb->users))) + if (!skb_unref(skb)) return; + /* if reaching here SKB is ready to free */ trace_consume_skb(skb); @@ -906,7 +915,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) C(head_frag); C(data); C(truesize); - atomic_set(&n->users, 1); + refcount_set(&n->users, 1); atomic_inc(&(skb_shinfo(skb)->dataref)); skb->cloned = 1; @@ -1018,9 +1027,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) return NULL; if (skb->fclone == SKB_FCLONE_ORIG && - atomic_read(&fclones->fclone_ref) == 1) { + refcount_read(&fclones->fclone_ref) == 1) { n = &fclones->skb2; - atomic_set(&fclones->fclone_ref, 2); + refcount_set(&fclones->fclone_ref, 2); } else { if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC; @@ -1412,7 +1421,7 @@ EXPORT_SYMBOL(skb_pad); * returned. */ -unsigned char *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len) +void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len) { if (tail != skb) { skb->data_len += len; @@ -1431,9 +1440,9 @@ EXPORT_SYMBOL_GPL(pskb_put); * exceed the total buffer size the kernel will panic. A pointer to the * first byte of the extra data is returned. */ -unsigned char *skb_put(struct sk_buff *skb, unsigned int len) +void *skb_put(struct sk_buff *skb, unsigned int len) { - unsigned char *tmp = skb_tail_pointer(skb); + void *tmp = skb_tail_pointer(skb); SKB_LINEAR_ASSERT(skb); skb->tail += len; skb->len += len; @@ -1452,7 +1461,7 @@ EXPORT_SYMBOL(skb_put); * start. If this would exceed the total buffer headroom the kernel will * panic. A pointer to the first byte of the extra data is returned. */ -unsigned char *skb_push(struct sk_buff *skb, unsigned int len) +void *skb_push(struct sk_buff *skb, unsigned int len) { skb->data -= len; skb->len += len; @@ -1472,7 +1481,7 @@ EXPORT_SYMBOL(skb_push); * is returned. Once the data has been pulled future pushes will overwrite * the old data. */ -unsigned char *skb_pull(struct sk_buff *skb, unsigned int len) +void *skb_pull(struct sk_buff *skb, unsigned int len) { return skb_pull_inline(skb, len); } @@ -1607,7 +1616,7 @@ EXPORT_SYMBOL(___pskb_trim); * * It is pretty complicated. Luckily, it is called only in exceptional cases. */ -unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta) +void *__pskb_pull_tail(struct sk_buff *skb, int delta) { /* If skb has not enough free space at tail, get new one * plus 128 bytes for future expansions. If we have enough @@ -2243,6 +2252,32 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, } EXPORT_SYMBOL(skb_copy_and_csum_bits); +static __wsum warn_crc32c_csum_update(const void *buff, int len, __wsum sum) +{ + net_warn_ratelimited( + "%s: attempt to compute crc32c without libcrc32c.ko\n", + __func__); + return 0; +} + +static __wsum warn_crc32c_csum_combine(__wsum csum, __wsum csum2, + int offset, int len) +{ + net_warn_ratelimited( + "%s: attempt to compute crc32c without libcrc32c.ko\n", + __func__); + return 0; +} + +static const struct skb_checksum_ops default_crc32c_ops = { + .update = warn_crc32c_csum_update, + .combine = warn_crc32c_csum_combine, +}; + +const struct skb_checksum_ops *crc32c_csum_stub __read_mostly = + &default_crc32c_ops; +EXPORT_SYMBOL(crc32c_csum_stub); + /** * skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy() * @from: source buffer @@ -2620,7 +2655,8 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) { int pos = skb_headlen(skb); - skb_shinfo(skb1)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; + skb_shinfo(skb1)->tx_flags |= skb_shinfo(skb)->tx_flags & + SKBTX_SHARED_FRAG; if (len < pos) /* Split line is inside header. */ skb_split_inside_header(skb, skb1, len, pos); else /* Second chunk has no header, nothing to copy. */ @@ -2988,7 +3024,7 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, get_page(pfrag->page); skb->truesize += copy; - atomic_add(copy, &sk->sk_wmem_alloc); + refcount_add(copy, &sk->sk_wmem_alloc); skb->len += copy; skb->data_len += copy; offset += copy; @@ -3029,7 +3065,7 @@ EXPORT_SYMBOL_GPL(skb_append_pagefrags); * that the checksum difference is zero (e.g., a valid IP header) * or you are setting ip_summed to CHECKSUM_NONE. */ -unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) +void *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) { unsigned char *data = skb->data; @@ -3235,8 +3271,8 @@ normal: skb_copy_from_linear_data_offset(head_skb, offset, skb_put(nskb, hsize), hsize); - skb_shinfo(nskb)->tx_flags = skb_shinfo(head_skb)->tx_flags & - SKBTX_SHARED_FRAG; + skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags & + SKBTX_SHARED_FRAG; while (pos < offset + len) { if (i >= nfrags) { @@ -3482,24 +3518,18 @@ void __init skb_init(void) NULL); } -/** - * skb_to_sgvec - Fill a scatter-gather list from a socket buffer - * @skb: Socket buffer containing the buffers to be mapped - * @sg: The scatter-gather list to map into - * @offset: The offset into the buffer's contents to start mapping - * @len: Length of buffer space to be mapped - * - * Fill the specified scatter-gather list with mappings/pointers into a - * region of the buffer space attached to a socket buffer. - */ static int -__skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) +__skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len, + unsigned int recursion_level) { int start = skb_headlen(skb); int i, copy = start - offset; struct sk_buff *frag_iter; int elt = 0; + if (unlikely(recursion_level >= 24)) + return -EMSGSIZE; + if (copy > 0) { if (copy > len) copy = len; @@ -3518,6 +3548,8 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); if ((copy = end - offset) > 0) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + if (unlikely(elt && sg_is_last(&sg[elt - 1]))) + return -EMSGSIZE; if (copy > len) copy = len; @@ -3532,16 +3564,22 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) } skb_walk_frags(skb, frag_iter) { - int end; + int end, ret; WARN_ON(start > offset + len); end = start + frag_iter->len; if ((copy = end - offset) > 0) { + if (unlikely(elt && sg_is_last(&sg[elt - 1]))) + return -EMSGSIZE; + if (copy > len) copy = len; - elt += __skb_to_sgvec(frag_iter, sg+elt, offset - start, - copy); + ret = __skb_to_sgvec(frag_iter, sg+elt, offset - start, + copy, recursion_level + 1); + if (unlikely(ret < 0)) + return ret; + elt += ret; if ((len -= copy) == 0) return elt; offset += copy; @@ -3552,6 +3590,31 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) return elt; } +/** + * skb_to_sgvec - Fill a scatter-gather list from a socket buffer + * @skb: Socket buffer containing the buffers to be mapped + * @sg: The scatter-gather list to map into + * @offset: The offset into the buffer's contents to start mapping + * @len: Length of buffer space to be mapped + * + * Fill the specified scatter-gather list with mappings/pointers into a + * region of the buffer space attached to a socket buffer. Returns either + * the number of scatterlist items used, or -EMSGSIZE if the contents + * could not fit. + */ +int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) +{ + int nsg = __skb_to_sgvec(skb, sg, offset, len, 0); + + if (nsg <= 0) + return nsg; + + sg_mark_end(&sg[nsg - 1]); + + return nsg; +} +EXPORT_SYMBOL_GPL(skb_to_sgvec); + /* As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given * sglist without mark the sg which contain last skb data as the end. * So the caller can mannipulate sg list as will when padding new data after @@ -3574,19 +3637,11 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) { - return __skb_to_sgvec(skb, sg, offset, len); + return __skb_to_sgvec(skb, sg, offset, len, 0); } EXPORT_SYMBOL_GPL(skb_to_sgvec_nomark); -int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) -{ - int nsg = __skb_to_sgvec(skb, sg, offset, len); - sg_mark_end(&sg[nsg - 1]); - - return nsg; -} -EXPORT_SYMBOL_GPL(skb_to_sgvec); /** * skb_cow_data - Check that a socket buffer's data buffers are writable @@ -3789,7 +3844,7 @@ struct sk_buff *skb_clone_sk(struct sk_buff *skb) struct sock *sk = skb->sk; struct sk_buff *clone; - if (!sk || !atomic_inc_not_zero(&sk->sk_refcnt)) + if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt)) return NULL; clone = skb_clone(skb, GFP_ATOMIC); @@ -3860,7 +3915,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, /* Take a reference to prevent skb_orphan() from freeing the socket, * but only if the socket refcount is not zero. */ - if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) { + if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) { *skb_hwtstamps(skb) = *hwtstamps; __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false); sock_put(sk); @@ -3878,6 +3933,10 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, if (!sk) return; + if (!hwtstamps && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) && + skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS) + return; + tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY; if (!skb_may_tx_timestamp(sk, tsonly)) return; @@ -3899,7 +3958,8 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, return; if (tsonly) { - skb_shinfo(skb)->tx_flags = skb_shinfo(orig_skb)->tx_flags; + skb_shinfo(skb)->tx_flags |= skb_shinfo(orig_skb)->tx_flags & + SKBTX_ANY_TSTAMP; skb_shinfo(skb)->tskey = skb_shinfo(orig_skb)->tskey; } @@ -3937,7 +3997,7 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) /* Take a reference to prevent skb_orphan() from freeing the socket, * but only if the socket refcount is not zero. */ - if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) { + if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) { err = sock_queue_err_skb(sk, skb); sock_put(sk); } diff --git a/net/core/sock.c b/net/core/sock.c index 0c3fc16223f9..ac2a404c73eb 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1038,6 +1038,10 @@ set_rcvbuf: #endif case SO_MAX_PACING_RATE: + if (val != ~0U) + cmpxchg(&sk->sk_pacing_status, + SK_PACING_NONE, + SK_PACING_NEEDED); sk->sk_max_pacing_rate = val; sk->sk_pacing_rate = min(sk->sk_pacing_rate, sk->sk_max_pacing_rate); @@ -1074,6 +1078,18 @@ static void cred_to_ucred(struct pid *pid, const struct cred *cred, } } +static int groups_to_user(gid_t __user *dst, const struct group_info *src) +{ + struct user_namespace *user_ns = current_user_ns(); + int i; + + for (i = 0; i < src->ngroups; i++) + if (put_user(from_kgid_munged(user_ns, src->gid[i]), dst + i)) + return -EFAULT; + + return 0; +} + int sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { @@ -1227,6 +1243,27 @@ int sock_getsockopt(struct socket *sock, int level, int optname, goto lenout; } + case SO_PEERGROUPS: + { + int ret, n; + + if (!sk->sk_peer_cred) + return -ENODATA; + + n = sk->sk_peer_cred->group_info->ngroups; + if (len < n * sizeof(gid_t)) { + len = n * sizeof(gid_t); + return put_user(len, optlen) ? -EFAULT : -ERANGE; + } + len = n * sizeof(gid_t); + + ret = groups_to_user((gid_t __user *)optval, + sk->sk_peer_cred->group_info); + if (ret) + return ret; + goto lenout; + } + case SO_PEERNAME: { char address[128]; @@ -1491,7 +1528,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, if (likely(sk->sk_net_refcnt)) get_net(net); sock_net_set(sk, net); - atomic_set(&sk->sk_wmem_alloc, 1); + refcount_set(&sk->sk_wmem_alloc, 1); mem_cgroup_sk_alloc(sk); cgroup_sk_alloc(&sk->sk_cgrp_data); @@ -1515,7 +1552,7 @@ static void __sk_destruct(struct rcu_head *head) sk->sk_destruct(sk); filter = rcu_dereference_check(sk->sk_filter, - atomic_read(&sk->sk_wmem_alloc) == 0); + refcount_read(&sk->sk_wmem_alloc) == 0); if (filter) { sk_filter_uncharge(sk, filter); RCU_INIT_POINTER(sk->sk_filter, NULL); @@ -1565,7 +1602,7 @@ void sk_free(struct sock *sk) * some packets are still in some tx queue. * If not null, sock_wfree() will call __sk_free(sk) later */ - if (atomic_dec_and_test(&sk->sk_wmem_alloc)) + if (refcount_dec_and_test(&sk->sk_wmem_alloc)) __sk_free(sk); } EXPORT_SYMBOL(sk_free); @@ -1622,7 +1659,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) /* * sk_wmem_alloc set to one (see sk_free() and sock_wfree()) */ - atomic_set(&newsk->sk_wmem_alloc, 1); + refcount_set(&newsk->sk_wmem_alloc, 1); atomic_set(&newsk->sk_omem_alloc, 0); sk_init_common(newsk); @@ -1671,7 +1708,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) * (Documentation/RCU/rculist_nulls.txt for details) */ smp_wmb(); - atomic_set(&newsk->sk_refcnt, 2); + refcount_set(&newsk->sk_refcnt, 2); /* * Increment the counter in the same struct proto as the master @@ -1750,7 +1787,7 @@ void sock_wfree(struct sk_buff *skb) * Keep a reference on sk_wmem_alloc, this will be released * after sk_write_space() call */ - atomic_sub(len - 1, &sk->sk_wmem_alloc); + WARN_ON(refcount_sub_and_test(len - 1, &sk->sk_wmem_alloc)); sk->sk_write_space(sk); len = 1; } @@ -1758,7 +1795,7 @@ void sock_wfree(struct sk_buff *skb) * if sk_wmem_alloc reaches 0, we must finish what sk_free() * could not do because of in-flight packets */ - if (atomic_sub_and_test(len, &sk->sk_wmem_alloc)) + if (refcount_sub_and_test(len, &sk->sk_wmem_alloc)) __sk_free(sk); } EXPORT_SYMBOL(sock_wfree); @@ -1770,7 +1807,7 @@ void __sock_wfree(struct sk_buff *skb) { struct sock *sk = skb->sk; - if (atomic_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)) + if (refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)) __sk_free(sk); } @@ -1792,7 +1829,7 @@ void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) * is enough to guarantee sk_free() wont free this sock until * all in-flight packets are completed */ - atomic_add(skb->truesize, &sk->sk_wmem_alloc); + refcount_add(skb->truesize, &sk->sk_wmem_alloc); } EXPORT_SYMBOL(skb_set_owner_w); @@ -1814,8 +1851,8 @@ void skb_orphan_partial(struct sk_buff *skb) ) { struct sock *sk = skb->sk; - if (atomic_inc_not_zero(&sk->sk_refcnt)) { - atomic_sub(skb->truesize, &sk->sk_wmem_alloc); + if (refcount_inc_not_zero(&sk->sk_refcnt)) { + WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)); skb->destructor = sock_efree; } } else { @@ -1875,7 +1912,7 @@ EXPORT_SYMBOL(sock_i_ino); struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, gfp_t priority) { - if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { + if (force || refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { struct sk_buff *skb = alloc_skb(size, priority); if (skb) { skb_set_owner_w(skb, sk); @@ -1950,7 +1987,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo) break; set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) + if (refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) break; if (sk->sk_shutdown & SEND_SHUTDOWN) break; @@ -2072,6 +2109,26 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg, } EXPORT_SYMBOL(sock_cmsg_send); +static void sk_enter_memory_pressure(struct sock *sk) +{ + if (!sk->sk_prot->enter_memory_pressure) + return; + + sk->sk_prot->enter_memory_pressure(sk); +} + +static void sk_leave_memory_pressure(struct sock *sk) +{ + if (sk->sk_prot->leave_memory_pressure) { + sk->sk_prot->leave_memory_pressure(sk); + } else { + unsigned long *memory_pressure = sk->sk_prot->memory_pressure; + + if (memory_pressure && *memory_pressure) + *memory_pressure = 0; + } +} + /* On 32bit arches, an skb frag is limited to 2^15 */ #define SKB_FRAG_PAGE_ORDER get_order(32768) @@ -2253,7 +2310,7 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) if (sk->sk_type == SOCK_STREAM) { if (sk->sk_wmem_queued < prot->sysctl_wmem[0]) return 1; - } else if (atomic_read(&sk->sk_wmem_alloc) < + } else if (refcount_read(&sk->sk_wmem_alloc) < prot->sysctl_wmem[0]) return 1; } @@ -2520,7 +2577,7 @@ static void sock_def_write_space(struct sock *sk) /* Do not wake up a writer until he can make "significant" * progress. --DaveM */ - if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { + if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | @@ -2630,7 +2687,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) * (Documentation/RCU/rculist_nulls.txt for details) */ smp_wmb(); - atomic_set(&sk->sk_refcnt, 1); + refcount_set(&sk->sk_refcnt, 1); atomic_set(&sk->sk_drops, 0); } EXPORT_SYMBOL(sock_init_data); diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 93106120f987..733f523707ac 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -178,10 +178,6 @@ static const struct nla_policy dcbnl_ieee_policy[DCB_ATTR_IEEE_MAX + 1] = { [DCB_ATTR_IEEE_QCN_STATS] = {.len = sizeof(struct ieee_qcn_stats)}, }; -static const struct nla_policy dcbnl_ieee_app[DCB_ATTR_IEEE_APP_MAX + 1] = { - [DCB_ATTR_IEEE_APP] = {.len = sizeof(struct dcb_app)}, -}; - /* DCB number of traffic classes nested attributes. */ static const struct nla_policy dcbnl_featcfg_nest[DCB_FEATCFG_ATTR_MAX + 1] = { [DCB_FEATCFG_ATTR_ALL] = {.type = NLA_FLAG}, @@ -1463,8 +1459,15 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh, nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) { struct dcb_app *app_data; + if (nla_type(attr) != DCB_ATTR_IEEE_APP) continue; + + if (nla_len(attr) < sizeof(struct dcb_app)) { + err = -ERANGE; + goto err; + } + app_data = nla_data(attr); if (ops->ieee_setapp) err = ops->ieee_setapp(netdev, app_data); diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 5e3a7302f774..e1295d5f2c56 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -233,7 +233,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) { struct dccp_sock *dp = dccp_sk(sk); struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); - const u32 now = ccid2_time_stamp; + const u32 now = ccid2_jiffies32; struct ccid2_seq *next; /* slow-start after idle periods (RFC 2581, RFC 2861) */ @@ -466,7 +466,7 @@ static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp, * The cleanest solution is to not use the ccid2s_sent field at all * and instead use DCCP timestamps: requires changes in other places. */ - ccid2_rtt_estimator(sk, ccid2_time_stamp - seqp->ccid2s_sent); + ccid2_rtt_estimator(sk, ccid2_jiffies32 - seqp->ccid2s_sent); } static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) @@ -478,7 +478,7 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) return; } - hc->tx_last_cong = ccid2_time_stamp; + hc->tx_last_cong = ccid2_jiffies32; hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U; hc->tx_ssthresh = max(hc->tx_cwnd, 2U); @@ -731,7 +731,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) hc->tx_rto = DCCP_TIMEOUT_INIT; hc->tx_rpdupack = -1; - hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_time_stamp; + hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_jiffies32; hc->tx_cwnd_used = 0; setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, (unsigned long)sk); diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index 18c97543e522..6e50ef2898fb 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h @@ -27,7 +27,7 @@ * CCID-2 timestamping faces the same issues as TCP timestamping. * Hence we reuse/share as much of the code as possible. */ -#define ccid2_time_stamp tcp_time_stamp +#define ccid2_jiffies32 ((u32)jiffies) /* NUMDUPACK parameter from RFC 4341, p. 6 */ #define NUMDUPACK 3 diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index f75482bdee9a..f85d901f4e3f 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -1033,33 +1033,34 @@ static int __init dccp_v4_init(void) { int err = proto_register(&dccp_v4_prot, 1); - if (err != 0) + if (err) goto out; - err = inet_add_protocol(&dccp_v4_protocol, IPPROTO_DCCP); - if (err != 0) - goto out_proto_unregister; - inet_register_protosw(&dccp_v4_protosw); err = register_pernet_subsys(&dccp_v4_ops); if (err) goto out_destroy_ctl_sock; + + err = inet_add_protocol(&dccp_v4_protocol, IPPROTO_DCCP); + if (err) + goto out_proto_unregister; + out: return err; +out_proto_unregister: + unregister_pernet_subsys(&dccp_v4_ops); out_destroy_ctl_sock: inet_unregister_protosw(&dccp_v4_protosw); - inet_del_protocol(&dccp_v4_protocol, IPPROTO_DCCP); -out_proto_unregister: proto_unregister(&dccp_v4_prot); goto out; } static void __exit dccp_v4_exit(void) { + inet_del_protocol(&dccp_v4_protocol, IPPROTO_DCCP); unregister_pernet_subsys(&dccp_v4_ops); inet_unregister_protosw(&dccp_v4_protosw); - inet_del_protocol(&dccp_v4_protocol, IPPROTO_DCCP); proto_unregister(&dccp_v4_prot); } diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 992621172220..c376af5bfdfb 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -353,7 +353,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { - atomic_inc(&skb->users); + refcount_inc(&skb->users); ireq->pktopts = skb; } ireq->ir_iif = sk->sk_bound_dev_if; @@ -1098,33 +1098,33 @@ static int __init dccp_v6_init(void) { int err = proto_register(&dccp_v6_prot, 1); - if (err != 0) + if (err) goto out; - err = inet6_add_protocol(&dccp_v6_protocol, IPPROTO_DCCP); - if (err != 0) - goto out_unregister_proto; - inet6_register_protosw(&dccp_v6_protosw); err = register_pernet_subsys(&dccp_v6_ops); - if (err != 0) + if (err) goto out_destroy_ctl_sock; + + err = inet6_add_protocol(&dccp_v6_protocol, IPPROTO_DCCP); + if (err) + goto out_unregister_proto; + out: return err; - +out_unregister_proto: + unregister_pernet_subsys(&dccp_v6_ops); out_destroy_ctl_sock: - inet6_del_protocol(&dccp_v6_protocol, IPPROTO_DCCP); inet6_unregister_protosw(&dccp_v6_protosw); -out_unregister_proto: proto_unregister(&dccp_v6_prot); goto out; } static void __exit dccp_v6_exit(void) { - unregister_pernet_subsys(&dccp_v6_ops); inet6_del_protocol(&dccp_v6_protocol, IPPROTO_DCCP); + unregister_pernet_subsys(&dccp_v6_ops); inet6_unregister_protosw(&dccp_v6_protosw); proto_unregister(&dccp_v6_prot); } diff --git a/net/dccp/options.c b/net/dccp/options.c index 74d29c56c367..51cdfc3bd8ca 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -484,7 +484,7 @@ int dccp_insert_option_mandatory(struct sk_buff *skb) return -1; DCCP_SKB_CB(skb)->dccpd_opt_len++; - *skb_push(skb, 1) = DCCPO_MANDATORY; + *(u8 *)skb_push(skb, 1) = DCCPO_MANDATORY; return 0; } diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 405483a07efc..73a0399dc7a2 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -447,7 +447,7 @@ static void dn_destruct(struct sock *sk) dst_release(rcu_dereference_check(sk->sk_dst_cache, 1)); } -static int dn_memory_pressure; +static unsigned long dn_memory_pressure; static void dn_enter_memory_pressure(struct sock *sk) { diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 9017a9a73ab5..fa0110b57ca1 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -846,7 +846,7 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa) skb->dev = dev; - msg = (struct endnode_hello_message *)skb_put(skb,sizeof(*msg)); + msg = skb_put(skb, sizeof(*msg)); msg->msgflg = 0x0D; memcpy(msg->tiver, dn_eco_version, 3); @@ -867,7 +867,7 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa) msg->datalen = 0x02; memset(msg->data, 0xAA, 2); - pktlen = (__le16 *)skb_push(skb,2); + pktlen = skb_push(skb, 2); *pktlen = cpu_to_le16(skb->len - 2); skb_reset_network_header(skb); @@ -959,7 +959,7 @@ static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa) skb_trim(skb, (27 + *i2)); - pktlen = (__le16 *)skb_push(skb, 2); + pktlen = skb_push(skb, 2); *pktlen = cpu_to_le16(skb->len - 2); skb_reset_network_header(skb); diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index f9058ebeb635..f9f6fb3f3c5b 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -389,7 +389,7 @@ link_it: } fi->fib_treeref++; - atomic_inc(&fi->fib_clntref); + refcount_set(&fi->fib_clntref, 1); spin_lock(&dn_fib_info_lock); fi->fib_next = dn_fib_info_list; fi->fib_prev = NULL; @@ -425,7 +425,7 @@ int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowidn switch (type) { case RTN_NAT: DN_FIB_RES_RESET(*res); - atomic_inc(&fi->fib_clntref); + refcount_inc(&fi->fib_clntref); return 0; case RTN_UNICAST: case RTN_LOCAL: @@ -438,7 +438,7 @@ int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowidn } if (nhsel < fi->fib_nhs) { res->nh_sel = nhsel; - atomic_inc(&fi->fib_clntref); + refcount_inc(&fi->fib_clntref); return 0; } endfor_nexthops(fi); diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index eeb5fc561f80..21dedf6fd0f7 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -559,7 +559,7 @@ static inline void dn_neigh_format_entry(struct seq_file *seq, (dn->flags&DN_NDFLAG_R2) ? "2" : "-", (dn->flags&DN_NDFLAG_P3) ? "3" : "-", dn->n.nud_state, - atomic_read(&dn->n.refcnt), + refcount_read(&dn->n.refcnt), dn->blksize, (dn->n.dev) ? dn->n.dev->name : "?"); read_unlock(&n->lock); diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c index 849805e7af52..66f035e476ea 100644 --- a/net/decnet/dn_nsp_out.c +++ b/net/decnet/dn_nsp_out.c @@ -484,7 +484,7 @@ void dn_send_conn_ack (struct sock *sk) if ((skb = dn_alloc_skb(sk, 3, sk->sk_allocation)) == NULL) return; - msg = (struct nsp_conn_ack_msg *)skb_put(skb, 3); + msg = skb_put(skb, 3); msg->msgflg = 0x24; msg->dstaddr = scp->addrrem; @@ -522,7 +522,7 @@ void dn_send_conn_conf(struct sock *sk, gfp_t gfp) if ((skb = dn_alloc_skb(sk, 50 + len, gfp)) == NULL) return; - msg = (struct nsp_conn_init_msg *)skb_put(skb, sizeof(*msg)); + msg = skb_put(skb, sizeof(*msg)); msg->msgflg = 0x28; msg->dstaddr = scp->addrrem; msg->srcaddr = scp->addrloc; @@ -530,10 +530,10 @@ void dn_send_conn_conf(struct sock *sk, gfp_t gfp) msg->info = scp->info_loc; msg->segsize = cpu_to_le16(scp->segsize_loc); - *skb_put(skb,1) = len; + skb_put_u8(skb, len); if (len > 0) - memcpy(skb_put(skb, len), scp->conndata_out.opt_data, len); + skb_put_data(skb, scp->conndata_out.opt_data, len); dn_nsp_send(skb); @@ -662,7 +662,7 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg) return; cb = DN_SKB_CB(skb); - msg = (struct nsp_conn_init_msg *)skb_put(skb,sizeof(*msg)); + msg = skb_put(skb, sizeof(*msg)); msg->msgflg = msgflg; msg->dstaddr = 0x0000; /* Remote Node will assign it*/ @@ -686,27 +686,27 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg) if (scp->peer.sdn_flags & SDF_UICPROXY) menuver |= DN_MENUVER_UIC; - *skb_put(skb, 1) = menuver; /* Menu Version */ + skb_put_u8(skb, menuver); /* Menu Version */ aux = scp->accessdata.acc_userl; - *skb_put(skb, 1) = aux; + skb_put_u8(skb, aux); if (aux > 0) - memcpy(skb_put(skb, aux), scp->accessdata.acc_user, aux); + skb_put_data(skb, scp->accessdata.acc_user, aux); aux = scp->accessdata.acc_passl; - *skb_put(skb, 1) = aux; + skb_put_u8(skb, aux); if (aux > 0) - memcpy(skb_put(skb, aux), scp->accessdata.acc_pass, aux); + skb_put_data(skb, scp->accessdata.acc_pass, aux); aux = scp->accessdata.acc_accl; - *skb_put(skb, 1) = aux; + skb_put_u8(skb, aux); if (aux > 0) - memcpy(skb_put(skb, aux), scp->accessdata.acc_acc, aux); + skb_put_data(skb, scp->accessdata.acc_acc, aux); aux = (__u8)le16_to_cpu(scp->conndata_out.opt_optl); - *skb_put(skb, 1) = aux; + skb_put_u8(skb, aux); if (aux > 0) - memcpy(skb_put(skb, aux), scp->conndata_out.opt_data, aux); + skb_put_data(skb, scp->conndata_out.opt_data, aux); scp->persist = dn_nsp_persist(sk); scp->persist_fxn = dn_nsp_retrans_conninit; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 6f95612b4d32..bcbe548f8854 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -183,11 +183,6 @@ static __inline__ unsigned int dn_hash(__le16 src, __le16 dst) return dn_rt_hash_mask & (unsigned int)tmp; } -static inline void dnrt_free(struct dn_route *rt) -{ - call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); -} - static void dn_dst_check_expire(unsigned long dummy) { int i; @@ -202,14 +197,15 @@ static void dn_dst_check_expire(unsigned long dummy) spin_lock(&dn_rt_hash_table[i].lock); while ((rt = rcu_dereference_protected(*rtp, lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) { - if (atomic_read(&rt->dst.__refcnt) || - (now - rt->dst.lastuse) < expire) { + if (atomic_read(&rt->dst.__refcnt) > 1 || + (now - rt->dst.lastuse) < expire) { rtp = &rt->dst.dn_next; continue; } *rtp = rt->dst.dn_next; rt->dst.dn_next = NULL; - dnrt_free(rt); + dst_dev_put(&rt->dst); + dst_release(&rt->dst); } spin_unlock(&dn_rt_hash_table[i].lock); @@ -235,14 +231,15 @@ static int dn_dst_gc(struct dst_ops *ops) while ((rt = rcu_dereference_protected(*rtp, lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) { - if (atomic_read(&rt->dst.__refcnt) || - (now - rt->dst.lastuse) < expire) { + if (atomic_read(&rt->dst.__refcnt) > 1 || + (now - rt->dst.lastuse) < expire) { rtp = &rt->dst.dn_next; continue; } *rtp = rt->dst.dn_next; rt->dst.dn_next = NULL; - dnrt_free(rt); + dst_dev_put(&rt->dst); + dst_release(&rt->dst); break; } spin_unlock_bh(&dn_rt_hash_table[i].lock); @@ -344,7 +341,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou dst_use(&rth->dst, now); spin_unlock_bh(&dn_rt_hash_table[hash].lock); - dst_free(&rt->dst); + dst_release_immediate(&rt->dst); *rp = rth; return 0; } @@ -374,7 +371,8 @@ static void dn_run_flush(unsigned long dummy) for(; rt; rt = next) { next = rcu_dereference_raw(rt->dst.dn_next); RCU_INIT_POINTER(rt->dst.dn_next, NULL); - dnrt_free(rt); + dst_dev_put(&rt->dst); + dst_release(&rt->dst); } nothing_to_declare: @@ -1215,6 +1213,7 @@ make_route: goto e_neighbour; hash = dn_hash(rt->fld.saddr, rt->fld.daddr); + /* dn_insert_route() increments dst->__refcnt */ dn_insert_route(rt, hash, (struct dn_route **)pprt); done: @@ -1237,7 +1236,7 @@ e_nobufs: err = -ENOBUFS; goto done; e_neighbour: - dst_free(&rt->dst); + dst_release_immediate(&rt->dst); goto e_nobufs; } @@ -1445,7 +1444,7 @@ static int dn_route_input_slow(struct sk_buff *skb) } make_route: - rt = dst_alloc(&dn_dst_ops, out_dev, 0, DST_OBSOLETE_NONE, DST_HOST); + rt = dst_alloc(&dn_dst_ops, out_dev, 1, DST_OBSOLETE_NONE, DST_HOST); if (rt == NULL) goto e_nobufs; @@ -1491,6 +1490,7 @@ make_route: goto e_neighbour; hash = dn_hash(rt->fld.saddr, rt->fld.daddr); + /* dn_insert_route() increments dst->__refcnt */ dn_insert_route(rt, hash, &rt); skb_dst_set(skb, &rt->dst); @@ -1514,7 +1514,7 @@ e_nobufs: goto done; e_neighbour: - dst_free(&rt->dst); + dst_release_immediate(&rt->dst); goto done; } diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 81a0868edb1d..cc5f8f971689 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -25,16 +25,19 @@ config NET_DSA_TAG_DSA config NET_DSA_TAG_EDSA bool -config NET_DSA_TAG_TRAILER +config NET_DSA_TAG_KSZ bool -config NET_DSA_TAG_QCA +config NET_DSA_TAG_LAN9303 bool config NET_DSA_TAG_MTK bool -config NET_DSA_TAG_LAN9303 +config NET_DSA_TAG_TRAILER + bool + +config NET_DSA_TAG_QCA bool endif diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 0b747d75e65a..fcce25da937c 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -1,12 +1,13 @@ # the core obj-$(CONFIG_NET_DSA) += dsa_core.o -dsa_core-y += dsa.o slave.o dsa2.o switch.o legacy.o +dsa_core-y += dsa.o dsa2.o legacy.o port.o slave.o switch.o # tagging formats dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o -dsa_core-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o -dsa_core-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o -dsa_core-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o +dsa_core-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o dsa_core-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o +dsa_core-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o +dsa_core-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o +dsa_core-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 90038d45a547..416ac4ef9ba9 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -24,7 +24,7 @@ #include <linux/phy_fixed.h> #include <linux/gpio/consumer.h> #include <linux/etherdevice.h> -#include <net/dsa.h> + #include "dsa_priv.h" static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb, @@ -40,26 +40,29 @@ static const struct dsa_device_ops none_ops = { }; const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = { +#ifdef CONFIG_NET_DSA_TAG_BRCM + [DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops, +#endif #ifdef CONFIG_NET_DSA_TAG_DSA [DSA_TAG_PROTO_DSA] = &dsa_netdev_ops, #endif #ifdef CONFIG_NET_DSA_TAG_EDSA [DSA_TAG_PROTO_EDSA] = &edsa_netdev_ops, #endif -#ifdef CONFIG_NET_DSA_TAG_TRAILER - [DSA_TAG_PROTO_TRAILER] = &trailer_netdev_ops, -#endif -#ifdef CONFIG_NET_DSA_TAG_BRCM - [DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops, +#ifdef CONFIG_NET_DSA_TAG_KSZ + [DSA_TAG_PROTO_KSZ] = &ksz_netdev_ops, #endif -#ifdef CONFIG_NET_DSA_TAG_QCA - [DSA_TAG_PROTO_QCA] = &qca_netdev_ops, +#ifdef CONFIG_NET_DSA_TAG_LAN9303 + [DSA_TAG_PROTO_LAN9303] = &lan9303_netdev_ops, #endif #ifdef CONFIG_NET_DSA_TAG_MTK [DSA_TAG_PROTO_MTK] = &mtk_netdev_ops, #endif -#ifdef CONFIG_NET_DSA_TAG_LAN9303 - [DSA_TAG_PROTO_LAN9303] = &lan9303_netdev_ops, +#ifdef CONFIG_NET_DSA_TAG_QCA + [DSA_TAG_PROTO_QCA] = &qca_netdev_ops, +#endif +#ifdef CONFIG_NET_DSA_TAG_TRAILER + [DSA_TAG_PROTO_TRAILER] = &trailer_netdev_ops, #endif [DSA_TAG_PROTO_NONE] = &none_ops, }; @@ -109,23 +112,22 @@ const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol) return ops; } -int dsa_cpu_port_ethtool_setup(struct dsa_switch *ds) +int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp) { + struct dsa_switch *ds = cpu_dp->ds; struct net_device *master; struct ethtool_ops *cpu_ops; - master = ds->dst->master_netdev; - if (ds->master_netdev) - master = ds->master_netdev; + master = cpu_dp->netdev; cpu_ops = devm_kzalloc(ds->dev, sizeof(*cpu_ops), GFP_KERNEL); if (!cpu_ops) return -ENOMEM; - memcpy(&ds->dst->master_ethtool_ops, master->ethtool_ops, + memcpy(&cpu_dp->ethtool_ops, master->ethtool_ops, sizeof(struct ethtool_ops)); - ds->dst->master_orig_ethtool_ops = master->ethtool_ops; - memcpy(cpu_ops, &ds->dst->master_ethtool_ops, + cpu_dp->orig_ethtool_ops = master->ethtool_ops; + memcpy(cpu_ops, &cpu_dp->ethtool_ops, sizeof(struct ethtool_ops)); dsa_cpu_port_ethtool_init(cpu_ops); master->ethtool_ops = cpu_ops; @@ -133,15 +135,9 @@ int dsa_cpu_port_ethtool_setup(struct dsa_switch *ds) return 0; } -void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds) +void dsa_cpu_port_ethtool_restore(struct dsa_port *cpu_dp) { - struct net_device *master; - - master = ds->dst->master_netdev; - if (ds->master_netdev) - master = ds->master_netdev; - - master->ethtool_ops = ds->dst->master_orig_ethtool_ops; + cpu_dp->netdev->ethtool_ops = cpu_dp->orig_ethtool_ops; } void dsa_cpu_dsa_destroy(struct dsa_port *port) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 7796580e99ee..56e46090526b 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -18,7 +18,7 @@ #include <linux/rtnetlink.h> #include <linux/of.h> #include <linux/of_net.h> -#include <net/dsa.h> + #include "dsa_priv.h" static LIST_HEAD(dsa_switch_trees); @@ -214,66 +214,59 @@ static int dsa_dst_complete(struct dsa_switch_tree *dst) return 0; } -static int dsa_dsa_port_apply(struct dsa_port *port, u32 index, - struct dsa_switch *ds) +static int dsa_dsa_port_apply(struct dsa_port *port) { + struct dsa_switch *ds = port->ds; int err; - err = dsa_cpu_dsa_setup(ds, ds->dev, port, index); + err = dsa_cpu_dsa_setup(ds, ds->dev, port, port->index); if (err) { dev_warn(ds->dev, "Failed to setup dsa port %d: %d\n", - index, err); + port->index, err); return err; } - memset(&ds->ports[index].devlink_port, 0, - sizeof(ds->ports[index].devlink_port)); + memset(&port->devlink_port, 0, sizeof(port->devlink_port)); - return devlink_port_register(ds->devlink, - &ds->ports[index].devlink_port, - index); + return devlink_port_register(ds->devlink, &port->devlink_port, + port->index); } -static void dsa_dsa_port_unapply(struct dsa_port *port, u32 index, - struct dsa_switch *ds) +static void dsa_dsa_port_unapply(struct dsa_port *port) { - devlink_port_unregister(&ds->ports[index].devlink_port); + devlink_port_unregister(&port->devlink_port); dsa_cpu_dsa_destroy(port); } -static int dsa_cpu_port_apply(struct dsa_port *port, u32 index, - struct dsa_switch *ds) +static int dsa_cpu_port_apply(struct dsa_port *port) { + struct dsa_switch *ds = port->ds; int err; - err = dsa_cpu_dsa_setup(ds, ds->dev, port, index); + err = dsa_cpu_dsa_setup(ds, ds->dev, port, port->index); if (err) { dev_warn(ds->dev, "Failed to setup cpu port %d: %d\n", - index, err); + port->index, err); return err; } - ds->cpu_port_mask |= BIT(index); - - memset(&ds->ports[index].devlink_port, 0, - sizeof(ds->ports[index].devlink_port)); - err = devlink_port_register(ds->devlink, &ds->ports[index].devlink_port, - index); + memset(&port->devlink_port, 0, sizeof(port->devlink_port)); + err = devlink_port_register(ds->devlink, &port->devlink_port, + port->index); return err; } -static void dsa_cpu_port_unapply(struct dsa_port *port, u32 index, - struct dsa_switch *ds) +static void dsa_cpu_port_unapply(struct dsa_port *port) { - devlink_port_unregister(&ds->ports[index].devlink_port); + devlink_port_unregister(&port->devlink_port); dsa_cpu_dsa_destroy(port); - ds->cpu_port_mask &= ~BIT(index); + port->ds->cpu_port_mask &= ~BIT(port->index); } -static int dsa_user_port_apply(struct dsa_port *port, u32 index, - struct dsa_switch *ds) +static int dsa_user_port_apply(struct dsa_port *port) { + struct dsa_switch *ds = port->ds; const char *name = port->name; int err; @@ -282,35 +275,32 @@ static int dsa_user_port_apply(struct dsa_port *port, u32 index, if (!name) name = "eth%d"; - err = dsa_slave_create(ds, ds->dev, index, name); + err = dsa_slave_create(ds, ds->dev, port->index, name); if (err) { dev_warn(ds->dev, "Failed to create slave %d: %d\n", - index, err); - ds->ports[index].netdev = NULL; + port->index, err); + port->netdev = NULL; return err; } - memset(&ds->ports[index].devlink_port, 0, - sizeof(ds->ports[index].devlink_port)); - err = devlink_port_register(ds->devlink, &ds->ports[index].devlink_port, - index); + memset(&port->devlink_port, 0, sizeof(port->devlink_port)); + err = devlink_port_register(ds->devlink, &port->devlink_port, + port->index); if (err) return err; - devlink_port_type_eth_set(&ds->ports[index].devlink_port, - ds->ports[index].netdev); + devlink_port_type_eth_set(&port->devlink_port, port->netdev); return 0; } -static void dsa_user_port_unapply(struct dsa_port *port, u32 index, - struct dsa_switch *ds) +static void dsa_user_port_unapply(struct dsa_port *port) { - devlink_port_unregister(&ds->ports[index].devlink_port); - if (ds->ports[index].netdev) { - dsa_slave_destroy(ds->ports[index].netdev); - ds->ports[index].netdev = NULL; - ds->enabled_port_mask &= ~(1 << index); + devlink_port_unregister(&port->devlink_port); + if (port->netdev) { + dsa_slave_destroy(port->netdev); + port->netdev = NULL; + port->ds->enabled_port_mask &= ~(1 << port->index); } } @@ -347,7 +337,7 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds) return err; if (ds->ops->set_addr) { - err = ds->ops->set_addr(ds, dst->master_netdev->dev_addr); + err = ds->ops->set_addr(ds, dst->cpu_dp->netdev->dev_addr); if (err < 0) return err; } @@ -370,20 +360,20 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds) continue; if (dsa_port_is_dsa(port)) { - err = dsa_dsa_port_apply(port, index, ds); + err = dsa_dsa_port_apply(port); if (err) return err; continue; } if (dsa_port_is_cpu(port)) { - err = dsa_cpu_port_apply(port, index, ds); + err = dsa_cpu_port_apply(port); if (err) return err; continue; } - err = dsa_user_port_apply(port, index, ds); + err = dsa_user_port_apply(port); if (err) continue; } @@ -402,16 +392,16 @@ static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds) continue; if (dsa_port_is_dsa(port)) { - dsa_dsa_port_unapply(port, index, ds); + dsa_dsa_port_unapply(port); continue; } if (dsa_port_is_cpu(port)) { - dsa_cpu_port_unapply(port, index, ds); + dsa_cpu_port_unapply(port); continue; } - dsa_user_port_unapply(port, index, ds); + dsa_user_port_unapply(port); } if (ds->slave_mii_bus && ds->ops->phy_read) @@ -443,8 +433,8 @@ static int dsa_dst_apply(struct dsa_switch_tree *dst) return err; } - if (dst->cpu_switch) { - err = dsa_cpu_port_ethtool_setup(dst->cpu_switch); + if (dst->cpu_dp) { + err = dsa_cpu_port_ethtool_setup(dst->cpu_dp); if (err) return err; } @@ -454,7 +444,7 @@ static int dsa_dst_apply(struct dsa_switch_tree *dst) * sent to the tag format's receive function. */ wmb(); - dst->master_netdev->dsa_ptr = (void *)dst; + dst->cpu_dp->netdev->dsa_ptr = dst; dst->applied = true; return 0; @@ -468,7 +458,7 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst) if (!dst->applied) return; - dst->master_netdev->dsa_ptr = NULL; + dst->cpu_dp->netdev->dsa_ptr = NULL; /* If we used a tagging format that doesn't have an ethertype * field, make sure that all packets from this point get sent @@ -484,9 +474,9 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst) dsa_ds_unapply(dst, ds); } - if (dst->cpu_switch) { - dsa_cpu_port_ethtool_restore(dst->cpu_switch); - dst->cpu_switch = NULL; + if (dst->cpu_dp) { + dsa_cpu_port_ethtool_restore(dst->cpu_dp); + dst->cpu_dp = NULL; } pr_info("DSA: tree %d unapplied\n", dst->tree); @@ -514,15 +504,9 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index, if (!ethernet_dev) return -EPROBE_DEFER; - if (!ds->master_netdev) - ds->master_netdev = ethernet_dev; - - if (!dst->master_netdev) - dst->master_netdev = ethernet_dev; - - if (!dst->cpu_switch) { - dst->cpu_switch = ds; - dst->cpu_port = index; + if (!dst->cpu_dp) { + dst->cpu_dp = port; + dst->cpu_dp->netdev = ethernet_dev; } tag_protocol = ds->ops->get_tag_protocol(ds); @@ -534,6 +518,12 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index, dst->rcv = dst->tag_ops->rcv; + /* Initialize cpu_port_mask now for drv->setup() + * to have access to a correct value, just like what + * net/dsa/dsa.c::dsa_switch_setup_one does. + */ + ds->cpu_port_mask |= BIT(index); + return 0; } @@ -545,14 +535,22 @@ static int dsa_ds_parse(struct dsa_switch_tree *dst, struct dsa_switch *ds) for (index = 0; index < ds->num_ports; index++) { port = &ds->ports[index]; - if (!dsa_port_is_valid(port)) + if (!dsa_port_is_valid(port) || + dsa_port_is_dsa(port)) continue; if (dsa_port_is_cpu(port)) { err = dsa_cpu_parse(port, index, dst, ds); if (err) return err; + } else { + /* Initialize enabled_port_mask now for drv->setup() + * to have access to a correct value, just like what + * net/dsa/dsa.c::dsa_switch_setup_one does. + */ + ds->enabled_port_mask |= BIT(index); } + } pr_info("DSA: switch %d %d parsed\n", dst->tree, ds->index); @@ -563,7 +561,9 @@ static int dsa_ds_parse(struct dsa_switch_tree *dst, struct dsa_switch *ds) static int dsa_dst_parse(struct dsa_switch_tree *dst) { struct dsa_switch *ds; + struct dsa_port *dp; u32 index; + int port; int err; for (index = 0; index < DSA_MAX_SWITCHES; index++) { @@ -576,11 +576,28 @@ static int dsa_dst_parse(struct dsa_switch_tree *dst) return err; } - if (!dst->master_netdev) { + if (!dst->cpu_dp->netdev) { pr_warn("Tree has no master device\n"); return -EINVAL; } + /* Assign the default CPU port to all ports of the fabric */ + for (index = 0; index < DSA_MAX_SWITCHES; index++) { + ds = dst->ds[index]; + if (!ds) + continue; + + for (port = 0; port < ds->num_ports; port++) { + dp = &ds->ports[port]; + if (!dsa_port_is_valid(dp) || + dsa_port_is_dsa(dp) || + dsa_port_is_cpu(dp)) + continue; + + dp->cpu_dp = dst->cpu_dp; + } + } + pr_info("DSA: tree %d parsed\n", dst->tree); return 0; @@ -601,13 +618,6 @@ static int dsa_parse_ports_dn(struct device_node *ports, struct dsa_switch *ds) return -EINVAL; ds->ports[reg].dn = port; - - /* Initialize enabled_port_mask now for ops->setup() - * to have access to a correct value, just like what - * net/dsa/dsa.c::dsa_switch_setup_one does. - */ - if (!dsa_port_is_cpu(&ds->ports[reg])) - ds->enabled_port_mask |= 1 << reg; } return 0; @@ -623,14 +633,6 @@ static int dsa_parse_ports(struct dsa_chip_data *cd, struct dsa_switch *ds) continue; ds->ports[i].name = cd->port_names[i]; - - /* Initialize enabled_port_mask now for drv->setup() - * to have access to a correct value, just like what - * net/dsa/dsa.c::dsa_switch_setup_one does. - */ - if (!dsa_port_is_cpu(&ds->ports[i])) - ds->enabled_port_mask |= 1 << i; - valid_name_found = true; } @@ -690,10 +692,10 @@ static struct device_node *dsa_get_ports(struct dsa_switch *ds, return ports; } -static int _dsa_register_switch(struct dsa_switch *ds, struct device *dev) +static int _dsa_register_switch(struct dsa_switch *ds) { - struct dsa_chip_data *pdata = dev->platform_data; - struct device_node *np = dev->of_node; + struct dsa_chip_data *pdata = ds->dev->platform_data; + struct device_node *np = ds->dev->of_node; struct dsa_switch_tree *dst; struct device_node *ports; u32 tree, index; @@ -807,12 +809,12 @@ struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n) } EXPORT_SYMBOL_GPL(dsa_switch_alloc); -int dsa_register_switch(struct dsa_switch *ds, struct device *dev) +int dsa_register_switch(struct dsa_switch *ds) { int err; mutex_lock(&dsa2_mutex); - err = _dsa_register_switch(ds, dev); + err = _dsa_register_switch(ds); mutex_unlock(&dsa2_mutex); return err; diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index f4a88e485213..55982cc39b24 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -14,6 +14,56 @@ #include <linux/phy.h> #include <linux/netdevice.h> #include <linux/netpoll.h> +#include <net/dsa.h> + +enum { + DSA_NOTIFIER_AGEING_TIME, + DSA_NOTIFIER_BRIDGE_JOIN, + DSA_NOTIFIER_BRIDGE_LEAVE, + DSA_NOTIFIER_FDB_ADD, + DSA_NOTIFIER_FDB_DEL, + DSA_NOTIFIER_MDB_ADD, + DSA_NOTIFIER_MDB_DEL, + DSA_NOTIFIER_VLAN_ADD, + DSA_NOTIFIER_VLAN_DEL, +}; + +/* DSA_NOTIFIER_AGEING_TIME */ +struct dsa_notifier_ageing_time_info { + struct switchdev_trans *trans; + unsigned int ageing_time; +}; + +/* DSA_NOTIFIER_BRIDGE_* */ +struct dsa_notifier_bridge_info { + struct net_device *br; + int sw_index; + int port; +}; + +/* DSA_NOTIFIER_FDB_* */ +struct dsa_notifier_fdb_info { + const struct switchdev_obj_port_fdb *fdb; + struct switchdev_trans *trans; + int sw_index; + int port; +}; + +/* DSA_NOTIFIER_MDB_* */ +struct dsa_notifier_mdb_info { + const struct switchdev_obj_port_mdb *mdb; + struct switchdev_trans *trans; + int sw_index; + int port; +}; + +/* DSA_NOTIFIER_VLAN_* */ +struct dsa_notifier_vlan_info { + const struct switchdev_obj_port_vlan *vlan; + struct switchdev_trans *trans; + int sw_index; + int port; +}; struct dsa_device_ops { struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev); @@ -23,6 +73,7 @@ struct dsa_device_ops { }; struct dsa_slave_priv { + /* Copy of dp->ds->dst->tag_ops->xmit for faster access in hot path */ struct sk_buff * (*xmit)(struct sk_buff *skb, struct net_device *dev); @@ -52,13 +103,46 @@ int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev, struct dsa_port *dport, int port); void dsa_cpu_dsa_destroy(struct dsa_port *dport); const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol); -int dsa_cpu_port_ethtool_setup(struct dsa_switch *ds); -void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds); +int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp); +void dsa_cpu_port_ethtool_restore(struct dsa_port *cpu_dp); /* legacy.c */ int dsa_legacy_register(void); void dsa_legacy_unregister(void); +/* port.c */ +int dsa_port_set_state(struct dsa_port *dp, u8 state, + struct switchdev_trans *trans); +void dsa_port_set_state_now(struct dsa_port *dp, u8 state); +int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br); +void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br); +int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, + struct switchdev_trans *trans); +int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock, + struct switchdev_trans *trans); +int dsa_port_fdb_add(struct dsa_port *dp, + const struct switchdev_obj_port_fdb *fdb, + struct switchdev_trans *trans); +int dsa_port_fdb_del(struct dsa_port *dp, + const struct switchdev_obj_port_fdb *fdb); +int dsa_port_fdb_dump(struct dsa_port *dp, struct switchdev_obj_port_fdb *fdb, + switchdev_obj_dump_cb_t *cb); +int dsa_port_mdb_add(struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb, + struct switchdev_trans *trans); +int dsa_port_mdb_del(struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb); +int dsa_port_mdb_dump(struct dsa_port *dp, struct switchdev_obj_port_mdb *mdb, + switchdev_obj_dump_cb_t *cb); +int dsa_port_vlan_add(struct dsa_port *dp, + const struct switchdev_obj_port_vlan *vlan, + struct switchdev_trans *trans); +int dsa_port_vlan_del(struct dsa_port *dp, + const struct switchdev_obj_port_vlan *vlan); +int dsa_port_vlan_dump(struct dsa_port *dp, + struct switchdev_obj_port_vlan *vlan, + switchdev_obj_dump_cb_t *cb); + /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; void dsa_slave_mii_bus_init(struct dsa_switch *ds); @@ -75,25 +159,38 @@ void dsa_slave_unregister_notifier(void); int dsa_switch_register_notifier(struct dsa_switch *ds); void dsa_switch_unregister_notifier(struct dsa_switch *ds); +/* tag_brcm.c */ +extern const struct dsa_device_ops brcm_netdev_ops; + /* tag_dsa.c */ extern const struct dsa_device_ops dsa_netdev_ops; /* tag_edsa.c */ extern const struct dsa_device_ops edsa_netdev_ops; -/* tag_trailer.c */ -extern const struct dsa_device_ops trailer_netdev_ops; +/* tag_ksz.c */ +extern const struct dsa_device_ops ksz_netdev_ops; -/* tag_brcm.c */ -extern const struct dsa_device_ops brcm_netdev_ops; +/* tag_lan9303.c */ +extern const struct dsa_device_ops lan9303_netdev_ops; + +/* tag_mtk.c */ +extern const struct dsa_device_ops mtk_netdev_ops; /* tag_qca.c */ extern const struct dsa_device_ops qca_netdev_ops; -/* tag_mtk.c */ -extern const struct dsa_device_ops mtk_netdev_ops; +/* tag_trailer.c */ +extern const struct dsa_device_ops trailer_netdev_ops; -/* tag_lan9303.c */ -extern const struct dsa_device_ops lan9303_netdev_ops; +static inline struct net_device *dsa_master_netdev(struct dsa_slave_priv *p) +{ + return p->dp->cpu_dp->netdev; +} + +static inline struct dsa_port *dsa_get_cpu_port(struct dsa_switch_tree *dst) +{ + return dst->cpu_dp; +} #endif diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index 7281098df04e..1d7a3282f2a7 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -22,7 +22,7 @@ #include <linux/sysfs.h> #include <linux/phy_fixed.h> #include <linux/etherdevice.h> -#include <net/dsa.h> + #include "dsa_priv.h" /* switch driver registration ***********************************************/ @@ -95,7 +95,8 @@ static int dsa_cpu_dsa_setups(struct dsa_switch *ds, struct device *dev) return 0; } -static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) +static int dsa_switch_setup_one(struct dsa_switch *ds, struct net_device *master, + struct device *parent) { const struct dsa_switch_ops *ops = ds->ops; struct dsa_switch_tree *dst = ds->dst; @@ -115,13 +116,13 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) continue; if (!strcmp(name, "cpu")) { - if (dst->cpu_switch) { - netdev_err(dst->master_netdev, + if (dst->cpu_dp) { + netdev_err(master, "multiple cpu ports?!\n"); return -EINVAL; } - dst->cpu_switch = ds; - dst->cpu_port = i; + dst->cpu_dp = &ds->ports[i]; + dst->cpu_dp->netdev = master; ds->cpu_port_mask |= 1 << i; } else if (!strcmp(name, "dsa")) { ds->dsa_port_mask |= 1 << i; @@ -144,7 +145,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) * tagging protocol to the preferred tagging format of this * switch. */ - if (dst->cpu_switch == ds) { + if (dst->cpu_dp->ds == ds) { enum dsa_tag_protocol tag_protocol; tag_protocol = ops->get_tag_protocol(ds); @@ -169,7 +170,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) return ret; if (ops->set_addr) { - ret = ops->set_addr(ds, dst->master_netdev->dev_addr); + ret = ops->set_addr(ds, master->dev_addr); if (ret < 0) return ret; } @@ -190,23 +191,24 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) */ for (i = 0; i < ds->num_ports; i++) { ds->ports[i].dn = cd->port_dn[i]; + ds->ports[i].cpu_dp = dst->cpu_dp; if (!(ds->enabled_port_mask & (1 << i))) continue; ret = dsa_slave_create(ds, parent, i, cd->port_names[i]); if (ret < 0) - netdev_err(dst->master_netdev, "[%d]: can't create dsa slave device for port %d(%s): %d\n", + netdev_err(master, "[%d]: can't create dsa slave device for port %d(%s): %d\n", index, i, cd->port_names[i], ret); } /* Perform configuration of the CPU and DSA ports */ ret = dsa_cpu_dsa_setups(ds, parent); if (ret < 0) - netdev_err(dst->master_netdev, "[%d] : can't configure CPU and DSA ports\n", + netdev_err(master, "[%d] : can't configure CPU and DSA ports\n", index); - ret = dsa_cpu_port_ethtool_setup(ds); + ret = dsa_cpu_port_ethtool_setup(ds->dst->cpu_dp); if (ret) return ret; @@ -214,8 +216,8 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) } static struct dsa_switch * -dsa_switch_setup(struct dsa_switch_tree *dst, int index, - struct device *parent, struct device *host_dev) +dsa_switch_setup(struct dsa_switch_tree *dst, struct net_device *master, + int index, struct device *parent, struct device *host_dev) { struct dsa_chip_data *cd = dst->pd->chip + index; const struct dsa_switch_ops *ops; @@ -229,11 +231,11 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, */ ops = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv); if (!ops) { - netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n", + netdev_err(master, "[%d]: could not detect attached switch\n", index); return ERR_PTR(-EINVAL); } - netdev_info(dst->master_netdev, "[%d]: detected a %s switch\n", + netdev_info(master, "[%d]: detected a %s switch\n", index, name); @@ -250,7 +252,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, ds->ops = ops; ds->priv = priv; - ret = dsa_switch_setup_one(ds, parent); + ret = dsa_switch_setup_one(ds, master, parent); if (ret) return ERR_PTR(ret); @@ -576,13 +578,11 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, unsigned configured = 0; dst->pd = pd; - dst->master_netdev = dev; - dst->cpu_port = -1; for (i = 0; i < pd->nr_chips; i++) { struct dsa_switch *ds; - ds = dsa_switch_setup(dst, i, parent, pd->chip[i].host_dev); + ds = dsa_switch_setup(dst, dev, i, parent, pd->chip[i].host_dev); if (IS_ERR(ds)) { netdev_err(dev, "[%d]: couldn't create dsa switch instance (error %ld)\n", i, PTR_ERR(ds)); @@ -606,7 +606,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, * sent to the tag format's receive function. */ wmb(); - dev->dsa_ptr = (void *)dst; + dev->dsa_ptr = dst; return 0; } @@ -673,7 +673,7 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst) { int i; - dst->master_netdev->dsa_ptr = NULL; + dst->cpu_dp->netdev->dsa_ptr = NULL; /* If we used a tagging format that doesn't have an ethertype * field, make sure that all packets from this point get sent @@ -688,9 +688,9 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst) dsa_switch_destroy(ds); } - dsa_cpu_port_ethtool_restore(dst->cpu_switch); + dsa_cpu_port_ethtool_restore(dst->cpu_dp); - dev_put(dst->master_netdev); + dev_put(dst->cpu_dp->netdev); } static int dsa_remove(struct platform_device *pdev) diff --git a/net/dsa/port.c b/net/dsa/port.c new file mode 100644 index 000000000000..efc3bce3a89d --- /dev/null +++ b/net/dsa/port.c @@ -0,0 +1,259 @@ +/* + * Handling of a single switch port + * + * Copyright (c) 2017 Savoir-faire Linux Inc. + * Vivien Didelot <vivien.didelot@savoirfairelinux.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/if_bridge.h> +#include <linux/notifier.h> + +#include "dsa_priv.h" + +static int dsa_port_notify(struct dsa_port *dp, unsigned long e, void *v) +{ + struct raw_notifier_head *nh = &dp->ds->dst->nh; + int err; + + err = raw_notifier_call_chain(nh, e, v); + + return notifier_to_errno(err); +} + +int dsa_port_set_state(struct dsa_port *dp, u8 state, + struct switchdev_trans *trans) +{ + struct dsa_switch *ds = dp->ds; + int port = dp->index; + + if (switchdev_trans_ph_prepare(trans)) + return ds->ops->port_stp_state_set ? 0 : -EOPNOTSUPP; + + if (ds->ops->port_stp_state_set) + ds->ops->port_stp_state_set(ds, port, state); + + if (ds->ops->port_fast_age) { + /* Fast age FDB entries or flush appropriate forwarding database + * for the given port, if we are moving it from Learning or + * Forwarding state, to Disabled or Blocking or Listening state. + */ + + if ((dp->stp_state == BR_STATE_LEARNING || + dp->stp_state == BR_STATE_FORWARDING) && + (state == BR_STATE_DISABLED || + state == BR_STATE_BLOCKING || + state == BR_STATE_LISTENING)) + ds->ops->port_fast_age(ds, port); + } + + dp->stp_state = state; + + return 0; +} + +void dsa_port_set_state_now(struct dsa_port *dp, u8 state) +{ + int err; + + err = dsa_port_set_state(dp, state, NULL); + if (err) + pr_err("DSA: failed to set STP state %u (%d)\n", state, err); +} + +int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br) +{ + struct dsa_notifier_bridge_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .br = br, + }; + int err; + + /* Here the port is already bridged. Reflect the current configuration + * so that drivers can program their chips accordingly. + */ + dp->bridge_dev = br; + + err = dsa_port_notify(dp, DSA_NOTIFIER_BRIDGE_JOIN, &info); + + /* The bridging is rolled back on error */ + if (err) + dp->bridge_dev = NULL; + + return err; +} + +void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br) +{ + struct dsa_notifier_bridge_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .br = br, + }; + int err; + + /* Here the port is already unbridged. Reflect the current configuration + * so that drivers can program their chips accordingly. + */ + dp->bridge_dev = NULL; + + err = dsa_port_notify(dp, DSA_NOTIFIER_BRIDGE_LEAVE, &info); + if (err) + pr_err("DSA: failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n"); + + /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer, + * so allow it to be in BR_STATE_FORWARDING to be kept functional + */ + dsa_port_set_state_now(dp, BR_STATE_FORWARDING); +} + +int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, + struct switchdev_trans *trans) +{ + struct dsa_switch *ds = dp->ds; + + /* bridge skips -EOPNOTSUPP, so skip the prepare phase */ + if (switchdev_trans_ph_prepare(trans)) + return 0; + + if (ds->ops->port_vlan_filtering) + return ds->ops->port_vlan_filtering(ds, dp->index, + vlan_filtering); + + return 0; +} + +int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock, + struct switchdev_trans *trans) +{ + unsigned long ageing_jiffies = clock_t_to_jiffies(ageing_clock); + unsigned int ageing_time = jiffies_to_msecs(ageing_jiffies); + struct dsa_notifier_ageing_time_info info = { + .ageing_time = ageing_time, + .trans = trans, + }; + + if (switchdev_trans_ph_prepare(trans)) + return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info); + + dp->ageing_time = ageing_time; + + return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info); +} + +int dsa_port_fdb_add(struct dsa_port *dp, + const struct switchdev_obj_port_fdb *fdb, + struct switchdev_trans *trans) +{ + struct dsa_notifier_fdb_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .trans = trans, + .fdb = fdb, + }; + + return dsa_port_notify(dp, DSA_NOTIFIER_FDB_ADD, &info); +} + +int dsa_port_fdb_del(struct dsa_port *dp, + const struct switchdev_obj_port_fdb *fdb) +{ + struct dsa_notifier_fdb_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .fdb = fdb, + }; + + return dsa_port_notify(dp, DSA_NOTIFIER_FDB_DEL, &info); +} + +int dsa_port_fdb_dump(struct dsa_port *dp, struct switchdev_obj_port_fdb *fdb, + switchdev_obj_dump_cb_t *cb) +{ + struct dsa_switch *ds = dp->ds; + + if (ds->ops->port_fdb_dump) + return ds->ops->port_fdb_dump(ds, dp->index, fdb, cb); + + return -EOPNOTSUPP; +} + +int dsa_port_mdb_add(struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb, + struct switchdev_trans *trans) +{ + struct dsa_notifier_mdb_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .trans = trans, + .mdb = mdb, + }; + + return dsa_port_notify(dp, DSA_NOTIFIER_MDB_ADD, &info); +} + +int dsa_port_mdb_del(struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb) +{ + struct dsa_notifier_mdb_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .mdb = mdb, + }; + + return dsa_port_notify(dp, DSA_NOTIFIER_MDB_DEL, &info); +} + +int dsa_port_mdb_dump(struct dsa_port *dp, struct switchdev_obj_port_mdb *mdb, + switchdev_obj_dump_cb_t *cb) +{ + struct dsa_switch *ds = dp->ds; + + if (ds->ops->port_mdb_dump) + return ds->ops->port_mdb_dump(ds, dp->index, mdb, cb); + + return -EOPNOTSUPP; +} + +int dsa_port_vlan_add(struct dsa_port *dp, + const struct switchdev_obj_port_vlan *vlan, + struct switchdev_trans *trans) +{ + struct dsa_notifier_vlan_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .trans = trans, + .vlan = vlan, + }; + + return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info); +} + +int dsa_port_vlan_del(struct dsa_port *dp, + const struct switchdev_obj_port_vlan *vlan) +{ + struct dsa_notifier_vlan_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .vlan = vlan, + }; + + return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info); +} + +int dsa_port_vlan_dump(struct dsa_port *dp, + struct switchdev_obj_port_vlan *vlan, + switchdev_obj_dump_cb_t *cb) +{ + struct dsa_switch *ds = dp->ds; + + if (ds->ops->port_vlan_dump) + return ds->ops->port_vlan_dump(ds, dp->index, vlan, cb); + + return -EOPNOTSUPP; +} diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 7693182df81e..9507bd38cf04 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -17,28 +17,16 @@ #include <linux/of_mdio.h> #include <linux/mdio.h> #include <linux/list.h> -#include <net/dsa.h> #include <net/rtnetlink.h> -#include <net/switchdev.h> #include <net/pkt_cls.h> #include <net/tc_act/tc_mirred.h> #include <linux/if_bridge.h> #include <linux/netpoll.h> + #include "dsa_priv.h" static bool dsa_slave_dev_check(struct net_device *dev); -static int dsa_slave_notify(struct net_device *dev, unsigned long e, void *v) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct raw_notifier_head *nh = &p->dp->ds->dst->nh; - int err; - - err = raw_notifier_call_chain(nh, e, v); - - return notifier_to_errno(err); -} - /* slave mii_bus handling ***************************************************/ static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg) { @@ -78,48 +66,16 @@ static int dsa_slave_get_iflink(const struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); - return p->dp->ds->dst->master_netdev->ifindex; -} - -static inline bool dsa_port_is_bridged(struct dsa_port *dp) -{ - return !!dp->bridge_dev; + return dsa_master_netdev(p)->ifindex; } -static void dsa_slave_set_state(struct net_device *dev, u8 state) +static int dsa_slave_open(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_port *dp = p->dp; struct dsa_switch *ds = dp->ds; - int port = dp->index; - - if (ds->ops->port_stp_state_set) - ds->ops->port_stp_state_set(ds, port, state); - - if (ds->ops->port_fast_age) { - /* Fast age FDB entries or flush appropriate forwarding database - * for the given port, if we are moving it from Learning or - * Forwarding state, to Disabled or Blocking or Listening state. - */ - - if ((dp->stp_state == BR_STATE_LEARNING || - dp->stp_state == BR_STATE_FORWARDING) && - (state == BR_STATE_DISABLED || - state == BR_STATE_BLOCKING || - state == BR_STATE_LISTENING)) - ds->ops->port_fast_age(ds, port); - } - - dp->stp_state = state; -} - -static int dsa_slave_open(struct net_device *dev) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct net_device *master = p->dp->ds->dst->master_netdev; - struct dsa_switch *ds = p->dp->ds; - u8 stp_state = dsa_port_is_bridged(p->dp) ? - BR_STATE_BLOCKING : BR_STATE_FORWARDING; + struct net_device *master = dsa_master_netdev(p); + u8 stp_state = dp->bridge_dev ? BR_STATE_BLOCKING : BR_STATE_FORWARDING; int err; if (!(master->flags & IFF_UP)) @@ -148,7 +104,7 @@ static int dsa_slave_open(struct net_device *dev) goto clear_promisc; } - dsa_slave_set_state(dev, stp_state); + dsa_port_set_state_now(p->dp, stp_state); if (p->phy) phy_start(p->phy); @@ -171,7 +127,7 @@ out: static int dsa_slave_close(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); - struct net_device *master = p->dp->ds->dst->master_netdev; + struct net_device *master = dsa_master_netdev(p); struct dsa_switch *ds = p->dp->ds; if (p->phy) @@ -190,7 +146,7 @@ static int dsa_slave_close(struct net_device *dev) if (ds->ops->port_disable) ds->ops->port_disable(ds, p->dp->index, p->phy); - dsa_slave_set_state(dev, BR_STATE_DISABLED); + dsa_port_set_state_now(p->dp, BR_STATE_DISABLED); return 0; } @@ -198,7 +154,7 @@ static int dsa_slave_close(struct net_device *dev) static void dsa_slave_change_rx_flags(struct net_device *dev, int change) { struct dsa_slave_priv *p = netdev_priv(dev); - struct net_device *master = p->dp->ds->dst->master_netdev; + struct net_device *master = dsa_master_netdev(p); if (change & IFF_ALLMULTI) dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1); @@ -209,7 +165,7 @@ static void dsa_slave_change_rx_flags(struct net_device *dev, int change) static void dsa_slave_set_rx_mode(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); - struct net_device *master = p->dp->ds->dst->master_netdev; + struct net_device *master = dsa_master_netdev(p); dev_mc_sync(master, dev); dev_uc_sync(master, dev); @@ -218,7 +174,7 @@ static void dsa_slave_set_rx_mode(struct net_device *dev) static int dsa_slave_set_mac_address(struct net_device *dev, void *a) { struct dsa_slave_priv *p = netdev_priv(dev); - struct net_device *master = p->dp->ds->dst->master_netdev; + struct net_device *master = dsa_master_netdev(p); struct sockaddr *addr = a; int err; @@ -243,140 +199,6 @@ out: return 0; } -static int dsa_slave_port_vlan_add(struct net_device *dev, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_port *dp = p->dp; - struct dsa_switch *ds = dp->ds; - - if (switchdev_trans_ph_prepare(trans)) { - if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add) - return -EOPNOTSUPP; - - return ds->ops->port_vlan_prepare(ds, dp->index, vlan, trans); - } - - ds->ops->port_vlan_add(ds, dp->index, vlan, trans); - - return 0; -} - -static int dsa_slave_port_vlan_del(struct net_device *dev, - const struct switchdev_obj_port_vlan *vlan) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - - if (!ds->ops->port_vlan_del) - return -EOPNOTSUPP; - - return ds->ops->port_vlan_del(ds, p->dp->index, vlan); -} - -static int dsa_slave_port_vlan_dump(struct net_device *dev, - struct switchdev_obj_port_vlan *vlan, - switchdev_obj_dump_cb_t *cb) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - - if (ds->ops->port_vlan_dump) - return ds->ops->port_vlan_dump(ds, p->dp->index, vlan, cb); - - return -EOPNOTSUPP; -} - -static int dsa_slave_port_fdb_add(struct net_device *dev, - const struct switchdev_obj_port_fdb *fdb, - struct switchdev_trans *trans) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - - if (switchdev_trans_ph_prepare(trans)) { - if (!ds->ops->port_fdb_prepare || !ds->ops->port_fdb_add) - return -EOPNOTSUPP; - - return ds->ops->port_fdb_prepare(ds, p->dp->index, fdb, trans); - } - - ds->ops->port_fdb_add(ds, p->dp->index, fdb, trans); - - return 0; -} - -static int dsa_slave_port_fdb_del(struct net_device *dev, - const struct switchdev_obj_port_fdb *fdb) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - int ret = -EOPNOTSUPP; - - if (ds->ops->port_fdb_del) - ret = ds->ops->port_fdb_del(ds, p->dp->index, fdb); - - return ret; -} - -static int dsa_slave_port_fdb_dump(struct net_device *dev, - struct switchdev_obj_port_fdb *fdb, - switchdev_obj_dump_cb_t *cb) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - - if (ds->ops->port_fdb_dump) - return ds->ops->port_fdb_dump(ds, p->dp->index, fdb, cb); - - return -EOPNOTSUPP; -} - -static int dsa_slave_port_mdb_add(struct net_device *dev, - const struct switchdev_obj_port_mdb *mdb, - struct switchdev_trans *trans) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - - if (switchdev_trans_ph_prepare(trans)) { - if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add) - return -EOPNOTSUPP; - - return ds->ops->port_mdb_prepare(ds, p->dp->index, mdb, trans); - } - - ds->ops->port_mdb_add(ds, p->dp->index, mdb, trans); - - return 0; -} - -static int dsa_slave_port_mdb_del(struct net_device *dev, - const struct switchdev_obj_port_mdb *mdb) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - - if (ds->ops->port_mdb_del) - return ds->ops->port_mdb_del(ds, p->dp->index, mdb); - - return -EOPNOTSUPP; -} - -static int dsa_slave_port_mdb_dump(struct net_device *dev, - struct switchdev_obj_port_mdb *mdb, - switchdev_obj_dump_cb_t *cb) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - - if (ds->ops->port_mdb_dump) - return ds->ops->port_mdb_dump(ds, p->dp->index, mdb, cb); - - return -EOPNOTSUPP; -} - static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct dsa_slave_priv *p = netdev_priv(dev); @@ -387,96 +209,24 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return -EOPNOTSUPP; } -static int dsa_slave_stp_state_set(struct net_device *dev, - const struct switchdev_attr *attr, - struct switchdev_trans *trans) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - - if (switchdev_trans_ph_prepare(trans)) - return ds->ops->port_stp_state_set ? 0 : -EOPNOTSUPP; - - dsa_slave_set_state(dev, attr->u.stp_state); - - return 0; -} - -static int dsa_slave_vlan_filtering(struct net_device *dev, - const struct switchdev_attr *attr, - struct switchdev_trans *trans) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - - /* bridge skips -EOPNOTSUPP, so skip the prepare phase */ - if (switchdev_trans_ph_prepare(trans)) - return 0; - - if (ds->ops->port_vlan_filtering) - return ds->ops->port_vlan_filtering(ds, p->dp->index, - attr->u.vlan_filtering); - - return 0; -} - -static unsigned int dsa_fastest_ageing_time(struct dsa_switch *ds, - unsigned int ageing_time) -{ - int i; - - for (i = 0; i < ds->num_ports; ++i) { - struct dsa_port *dp = &ds->ports[i]; - - if (dp && dp->ageing_time && dp->ageing_time < ageing_time) - ageing_time = dp->ageing_time; - } - - return ageing_time; -} - -static int dsa_slave_ageing_time(struct net_device *dev, - const struct switchdev_attr *attr, - struct switchdev_trans *trans) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - unsigned long ageing_jiffies = clock_t_to_jiffies(attr->u.ageing_time); - unsigned int ageing_time = jiffies_to_msecs(ageing_jiffies); - - if (switchdev_trans_ph_prepare(trans)) { - if (ds->ageing_time_min && ageing_time < ds->ageing_time_min) - return -ERANGE; - if (ds->ageing_time_max && ageing_time > ds->ageing_time_max) - return -ERANGE; - return 0; - } - - /* Keep the fastest ageing time in case of multiple bridges */ - p->dp->ageing_time = ageing_time; - ageing_time = dsa_fastest_ageing_time(ds, ageing_time); - - if (ds->ops->set_ageing_time) - return ds->ops->set_ageing_time(ds, ageing_time); - - return 0; -} - static int dsa_slave_port_attr_set(struct net_device *dev, const struct switchdev_attr *attr, struct switchdev_trans *trans) { + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = p->dp; int ret; switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_STP_STATE: - ret = dsa_slave_stp_state_set(dev, attr, trans); + ret = dsa_port_set_state(dp, attr->u.stp_state, trans); break; case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING: - ret = dsa_slave_vlan_filtering(dev, attr, trans); + ret = dsa_port_vlan_filtering(dp, attr->u.vlan_filtering, + trans); break; case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: - ret = dsa_slave_ageing_time(dev, attr, trans); + ret = dsa_port_ageing_time(dp, attr->u.ageing_time, trans); break; default: ret = -EOPNOTSUPP; @@ -490,6 +240,8 @@ static int dsa_slave_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, struct switchdev_trans *trans) { + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = p->dp; int err; /* For the prepare phase, ensure the full set of changes is feasable in @@ -499,18 +251,14 @@ static int dsa_slave_port_obj_add(struct net_device *dev, switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_FDB: - err = dsa_slave_port_fdb_add(dev, - SWITCHDEV_OBJ_PORT_FDB(obj), - trans); + err = dsa_port_fdb_add(dp, SWITCHDEV_OBJ_PORT_FDB(obj), trans); break; case SWITCHDEV_OBJ_ID_PORT_MDB: - err = dsa_slave_port_mdb_add(dev, SWITCHDEV_OBJ_PORT_MDB(obj), - trans); + err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans); break; case SWITCHDEV_OBJ_ID_PORT_VLAN: - err = dsa_slave_port_vlan_add(dev, - SWITCHDEV_OBJ_PORT_VLAN(obj), - trans); + err = dsa_port_vlan_add(dp, SWITCHDEV_OBJ_PORT_VLAN(obj), + trans); break; default: err = -EOPNOTSUPP; @@ -523,19 +271,19 @@ static int dsa_slave_port_obj_add(struct net_device *dev, static int dsa_slave_port_obj_del(struct net_device *dev, const struct switchdev_obj *obj) { + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = p->dp; int err; switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_FDB: - err = dsa_slave_port_fdb_del(dev, - SWITCHDEV_OBJ_PORT_FDB(obj)); + err = dsa_port_fdb_del(dp, SWITCHDEV_OBJ_PORT_FDB(obj)); break; case SWITCHDEV_OBJ_ID_PORT_MDB: - err = dsa_slave_port_mdb_del(dev, SWITCHDEV_OBJ_PORT_MDB(obj)); + err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_PORT_VLAN: - err = dsa_slave_port_vlan_del(dev, - SWITCHDEV_OBJ_PORT_VLAN(obj)); + err = dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj)); break; default: err = -EOPNOTSUPP; @@ -549,22 +297,19 @@ static int dsa_slave_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj, switchdev_obj_dump_cb_t *cb) { + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = p->dp; int err; switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_FDB: - err = dsa_slave_port_fdb_dump(dev, - SWITCHDEV_OBJ_PORT_FDB(obj), - cb); + err = dsa_port_fdb_dump(dp, SWITCHDEV_OBJ_PORT_FDB(obj), cb); break; case SWITCHDEV_OBJ_ID_PORT_MDB: - err = dsa_slave_port_mdb_dump(dev, SWITCHDEV_OBJ_PORT_MDB(obj), - cb); + err = dsa_port_mdb_dump(dp, SWITCHDEV_OBJ_PORT_MDB(obj), cb); break; case SWITCHDEV_OBJ_ID_PORT_VLAN: - err = dsa_slave_port_vlan_dump(dev, - SWITCHDEV_OBJ_PORT_VLAN(obj), - cb); + err = dsa_port_vlan_dump(dp, SWITCHDEV_OBJ_PORT_VLAN(obj), cb); break; default: err = -EOPNOTSUPP; @@ -574,57 +319,6 @@ static int dsa_slave_port_obj_dump(struct net_device *dev, return err; } -static int dsa_slave_bridge_port_join(struct net_device *dev, - struct net_device *br) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_notifier_bridge_info info = { - .sw_index = p->dp->ds->index, - .port = p->dp->index, - .br = br, - }; - int err; - - /* Here the port is already bridged. Reflect the current configuration - * so that drivers can program their chips accordingly. - */ - p->dp->bridge_dev = br; - - err = dsa_slave_notify(dev, DSA_NOTIFIER_BRIDGE_JOIN, &info); - - /* The bridging is rolled back on error */ - if (err) - p->dp->bridge_dev = NULL; - - return err; -} - -static void dsa_slave_bridge_port_leave(struct net_device *dev, - struct net_device *br) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_notifier_bridge_info info = { - .sw_index = p->dp->ds->index, - .port = p->dp->index, - .br = br, - }; - int err; - - /* Here the port is already unbridged. Reflect the current configuration - * so that drivers can program their chips accordingly. - */ - p->dp->bridge_dev = NULL; - - err = dsa_slave_notify(dev, DSA_NOTIFIER_BRIDGE_LEAVE, &info); - if (err) - netdev_err(dev, "failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n"); - - /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer, - * so allow it to be in BR_STATE_FORWARDING to be kept functional - */ - dsa_slave_set_state(dev, BR_STATE_FORWARDING); -} - static int dsa_slave_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) { @@ -663,10 +357,14 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) dev->stats.tx_packets++; dev->stats.tx_bytes += skb->len; - /* Transmit function may have to reallocate the original SKB */ + /* Transmit function may have to reallocate the original SKB, + * in which case it must have freed it. Only free it here on error. + */ nskb = p->xmit(skb, dev); - if (!nskb) + if (!nskb) { + kfree_skb(skb); return NETDEV_TX_OK; + } /* SKB for netpoll still need to be mangled with the protocol-specific * tag to be successfully transmitted @@ -677,7 +375,7 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) /* Queue the SKB for transmission on the parent interface, but * do not modify its EtherType */ - nskb->dev = p->dp->ds->dst->master_netdev; + nskb->dev = dsa_master_netdev(p); dev_queue_xmit(nskb); return NETDEV_TX_OK; @@ -689,12 +387,13 @@ dsa_slave_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *cmd) { struct dsa_slave_priv *p = netdev_priv(dev); - int err = -EOPNOTSUPP; - if (p->phy != NULL) - err = phy_ethtool_ksettings_get(p->phy, cmd); + if (!p->phy) + return -EOPNOTSUPP; - return err; + phy_ethtool_ksettings_get(p->phy, cmd); + + return 0; } static int @@ -821,14 +520,14 @@ static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev, uint64_t *data) { struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_switch *ds = dst->cpu_switch; - s8 cpu_port = dst->cpu_port; + struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); + struct dsa_switch *ds = cpu_dp->ds; + s8 cpu_port = cpu_dp->index; int count = 0; - if (dst->master_ethtool_ops.get_sset_count) { - count = dst->master_ethtool_ops.get_sset_count(dev, - ETH_SS_STATS); - dst->master_ethtool_ops.get_ethtool_stats(dev, stats, data); + if (cpu_dp->ethtool_ops.get_sset_count) { + count = cpu_dp->ethtool_ops.get_sset_count(dev, ETH_SS_STATS); + cpu_dp->ethtool_ops.get_ethtool_stats(dev, stats, data); } if (ds->ops->get_ethtool_stats) @@ -838,11 +537,12 @@ static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev, static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset) { struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_switch *ds = dst->cpu_switch; + struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); + struct dsa_switch *ds = cpu_dp->ds; int count = 0; - if (dst->master_ethtool_ops.get_sset_count) - count += dst->master_ethtool_ops.get_sset_count(dev, sset); + if (cpu_dp->ethtool_ops.get_sset_count) + count += cpu_dp->ethtool_ops.get_sset_count(dev, sset); if (sset == ETH_SS_STATS && ds->ops->get_sset_count) count += ds->ops->get_sset_count(ds); @@ -854,8 +554,9 @@ static void dsa_cpu_port_get_strings(struct net_device *dev, uint32_t stringset, uint8_t *data) { struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_switch *ds = dst->cpu_switch; - s8 cpu_port = dst->cpu_port; + struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); + struct dsa_switch *ds = cpu_dp->ds; + s8 cpu_port = cpu_dp->index; int len = ETH_GSTRING_LEN; int mcount = 0, count; unsigned int i; @@ -866,10 +567,9 @@ static void dsa_cpu_port_get_strings(struct net_device *dev, /* We do not want to be NULL-terminated, since this is a prefix */ pfx[sizeof(pfx) - 1] = '_'; - if (dst->master_ethtool_ops.get_sset_count) { - mcount = dst->master_ethtool_ops.get_sset_count(dev, - ETH_SS_STATS); - dst->master_ethtool_ops.get_strings(dev, stringset, data); + if (cpu_dp->ethtool_ops.get_sset_count) { + mcount = cpu_dp->ethtool_ops.get_sset_count(dev, ETH_SS_STATS); + cpu_dp->ethtool_ops.get_strings(dev, stringset, data); } if (stringset == ETH_SS_STATS && ds->ops->get_strings) { @@ -985,8 +685,7 @@ static int dsa_slave_netpoll_setup(struct net_device *dev, struct netpoll_info *ni) { struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - struct net_device *master = ds->dst->master_netdev; + struct net_device *master = dsa_master_netdev(p); struct netpoll *netpoll; int err = 0; @@ -1138,10 +837,13 @@ static void dsa_slave_del_cls_matchall(struct net_device *dev, } static int dsa_slave_setup_tc(struct net_device *dev, u32 handle, - __be16 protocol, struct tc_to_netdev *tc) + u32 chain_index, __be16 protocol, + struct tc_to_netdev *tc) { bool ingress = TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS); - int ret = -EOPNOTSUPP; + + if (chain_index) + return -EOPNOTSUPP; switch (tc->type) { case TC_SETUP_MATCHALL: @@ -1155,10 +857,8 @@ static int dsa_slave_setup_tc(struct net_device *dev, u32 handle, return 0; } default: - break; + return -EOPNOTSUPP; } - - return ret; } void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops) @@ -1441,11 +1141,11 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, struct net_device *master; struct net_device *slave_dev; struct dsa_slave_priv *p; + struct dsa_port *cpu_dp; int ret; - master = ds->dst->master_netdev; - if (ds->master_netdev) - master = ds->master_netdev; + cpu_dp = ds->dst->cpu_dp; + master = cpu_dp->netdev; slave_dev = alloc_netdev(sizeof(struct dsa_slave_priv), name, NET_NAME_UNKNOWN, ether_setup); @@ -1528,14 +1228,16 @@ static bool dsa_slave_dev_check(struct net_device *dev) static int dsa_slave_changeupper(struct net_device *dev, struct netdev_notifier_changeupper_info *info) { + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = p->dp; int err = NOTIFY_DONE; if (netif_is_bridge_master(info->upper_dev)) { if (info->linking) { - err = dsa_slave_bridge_port_join(dev, info->upper_dev); + err = dsa_port_bridge_join(dp, info->upper_dev); err = notifier_from_errno(err); } else { - dsa_slave_bridge_port_leave(dev, info->upper_dev); + dsa_port_bridge_leave(dp, info->upper_dev); err = NOTIFY_OK; } } diff --git a/net/dsa/switch.c b/net/dsa/switch.c index ca6e26e514f0..97e2e9c8cf3f 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -12,7 +12,47 @@ #include <linux/netdevice.h> #include <linux/notifier.h> -#include <net/dsa.h> +#include <net/switchdev.h> + +#include "dsa_priv.h" + +static unsigned int dsa_switch_fastest_ageing_time(struct dsa_switch *ds, + unsigned int ageing_time) +{ + int i; + + for (i = 0; i < ds->num_ports; ++i) { + struct dsa_port *dp = &ds->ports[i]; + + if (dp->ageing_time && dp->ageing_time < ageing_time) + ageing_time = dp->ageing_time; + } + + return ageing_time; +} + +static int dsa_switch_ageing_time(struct dsa_switch *ds, + struct dsa_notifier_ageing_time_info *info) +{ + unsigned int ageing_time = info->ageing_time; + struct switchdev_trans *trans = info->trans; + + if (switchdev_trans_ph_prepare(trans)) { + if (ds->ageing_time_min && ageing_time < ds->ageing_time_min) + return -ERANGE; + if (ds->ageing_time_max && ageing_time > ds->ageing_time_max) + return -ERANGE; + return 0; + } + + /* Program the fastest ageing time in case of multiple bridges */ + ageing_time = dsa_switch_fastest_ageing_time(ds, ageing_time); + + if (ds->ops->set_ageing_time) + return ds->ops->set_ageing_time(ds, ageing_time); + + return 0; +} static int dsa_switch_bridge_join(struct dsa_switch *ds, struct dsa_notifier_bridge_info *info) @@ -40,6 +80,137 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, return 0; } +static int dsa_switch_fdb_add(struct dsa_switch *ds, + struct dsa_notifier_fdb_info *info) +{ + const struct switchdev_obj_port_fdb *fdb = info->fdb; + struct switchdev_trans *trans = info->trans; + + /* Do not care yet about other switch chips of the fabric */ + if (ds->index != info->sw_index) + return 0; + + if (switchdev_trans_ph_prepare(trans)) { + if (!ds->ops->port_fdb_prepare || !ds->ops->port_fdb_add) + return -EOPNOTSUPP; + + return ds->ops->port_fdb_prepare(ds, info->port, fdb, trans); + } + + ds->ops->port_fdb_add(ds, info->port, fdb, trans); + + return 0; +} + +static int dsa_switch_fdb_del(struct dsa_switch *ds, + struct dsa_notifier_fdb_info *info) +{ + const struct switchdev_obj_port_fdb *fdb = info->fdb; + + /* Do not care yet about other switch chips of the fabric */ + if (ds->index != info->sw_index) + return 0; + + if (!ds->ops->port_fdb_del) + return -EOPNOTSUPP; + + return ds->ops->port_fdb_del(ds, info->port, fdb); +} + +static int dsa_switch_mdb_add(struct dsa_switch *ds, + struct dsa_notifier_mdb_info *info) +{ + const struct switchdev_obj_port_mdb *mdb = info->mdb; + struct switchdev_trans *trans = info->trans; + DECLARE_BITMAP(group, ds->num_ports); + int port, err; + + /* Build a mask of Multicast group members */ + bitmap_zero(group, ds->num_ports); + if (ds->index == info->sw_index) + set_bit(info->port, group); + for (port = 0; port < ds->num_ports; port++) + if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) + set_bit(port, group); + + if (switchdev_trans_ph_prepare(trans)) { + if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add) + return -EOPNOTSUPP; + + for_each_set_bit(port, group, ds->num_ports) { + err = ds->ops->port_mdb_prepare(ds, port, mdb, trans); + if (err) + return err; + } + } + + for_each_set_bit(port, group, ds->num_ports) + ds->ops->port_mdb_add(ds, port, mdb, trans); + + return 0; +} + +static int dsa_switch_mdb_del(struct dsa_switch *ds, + struct dsa_notifier_mdb_info *info) +{ + const struct switchdev_obj_port_mdb *mdb = info->mdb; + + if (!ds->ops->port_mdb_del) + return -EOPNOTSUPP; + + if (ds->index == info->sw_index) + return ds->ops->port_mdb_del(ds, info->port, mdb); + + return 0; +} + +static int dsa_switch_vlan_add(struct dsa_switch *ds, + struct dsa_notifier_vlan_info *info) +{ + const struct switchdev_obj_port_vlan *vlan = info->vlan; + struct switchdev_trans *trans = info->trans; + DECLARE_BITMAP(members, ds->num_ports); + int port, err; + + /* Build a mask of VLAN members */ + bitmap_zero(members, ds->num_ports); + if (ds->index == info->sw_index) + set_bit(info->port, members); + for (port = 0; port < ds->num_ports; port++) + if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) + set_bit(port, members); + + if (switchdev_trans_ph_prepare(trans)) { + if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add) + return -EOPNOTSUPP; + + for_each_set_bit(port, members, ds->num_ports) { + err = ds->ops->port_vlan_prepare(ds, port, vlan, trans); + if (err) + return err; + } + } + + for_each_set_bit(port, members, ds->num_ports) + ds->ops->port_vlan_add(ds, port, vlan, trans); + + return 0; +} + +static int dsa_switch_vlan_del(struct dsa_switch *ds, + struct dsa_notifier_vlan_info *info) +{ + const struct switchdev_obj_port_vlan *vlan = info->vlan; + + if (!ds->ops->port_vlan_del) + return -EOPNOTSUPP; + + if (ds->index == info->sw_index) + return ds->ops->port_vlan_del(ds, info->port, vlan); + + return 0; +} + static int dsa_switch_event(struct notifier_block *nb, unsigned long event, void *info) { @@ -47,12 +218,33 @@ static int dsa_switch_event(struct notifier_block *nb, int err; switch (event) { + case DSA_NOTIFIER_AGEING_TIME: + err = dsa_switch_ageing_time(ds, info); + break; case DSA_NOTIFIER_BRIDGE_JOIN: err = dsa_switch_bridge_join(ds, info); break; case DSA_NOTIFIER_BRIDGE_LEAVE: err = dsa_switch_bridge_leave(ds, info); break; + case DSA_NOTIFIER_FDB_ADD: + err = dsa_switch_fdb_add(ds, info); + break; + case DSA_NOTIFIER_FDB_DEL: + err = dsa_switch_fdb_del(ds, info); + break; + case DSA_NOTIFIER_MDB_ADD: + err = dsa_switch_mdb_add(ds, info); + break; + case DSA_NOTIFIER_MDB_DEL: + err = dsa_switch_mdb_del(ds, info); + break; + case DSA_NOTIFIER_VLAN_ADD: + err = dsa_switch_vlan_add(ds, info); + break; + case DSA_NOTIFIER_VLAN_DEL: + err = dsa_switch_vlan_del(ds, info); + break; default: err = -EOPNOTSUPP; break; diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index 2a9b52c5af86..c697d9815177 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -12,7 +12,7 @@ #include <linux/etherdevice.h> #include <linux/list.h> #include <linux/slab.h> -#include <net/dsa.h> + #include "dsa_priv.h" /* This tag length is 4 bytes, older ones were 6 bytes, we do not @@ -65,7 +65,7 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev u8 *brcm_tag; if (skb_cow_head(skb, BRCM_TAG_LEN) < 0) - goto out_free; + return NULL; skb_push(skb, BRCM_TAG_LEN); @@ -86,10 +86,6 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev brcm_tag[3] = (1 << p->dp->index) & BRCM_IG_DSTMAP1_MASK; return skb; - -out_free: - kfree_skb(skb); - return NULL; } static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, @@ -97,34 +93,33 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, struct net_device *orig_dev) { struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_switch *ds; + struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); + struct dsa_switch *ds = cpu_dp->ds; int source_port; u8 *brcm_tag; - ds = dst->cpu_switch; - if (unlikely(!pskb_may_pull(skb, BRCM_TAG_LEN))) - goto out_drop; + return NULL; /* skb->data points to the EtherType, the tag is right before it */ brcm_tag = skb->data - 2; /* The opcode should never be different than 0b000 */ if (unlikely((brcm_tag[0] >> BRCM_OPCODE_SHIFT) & BRCM_OPCODE_MASK)) - goto out_drop; + return NULL; /* We should never see a reserved reason code without knowing how to * handle it */ if (unlikely(brcm_tag[2] & BRCM_EG_RC_RSVD)) - goto out_drop; + return NULL; /* Locate which port this is coming from */ source_port = brcm_tag[3] & BRCM_EG_PID_MASK; /* Validate port against switch setup, either the port is totally */ if (source_port >= ds->num_ports || !ds->ports[source_port].netdev) - goto out_drop; + return NULL; /* Remove Broadcom tag and update checksum */ skb_pull_rcsum(skb, BRCM_TAG_LEN); @@ -137,9 +132,6 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, skb->dev = ds->ports[source_port].netdev; return skb; - -out_drop: - return NULL; } const struct dsa_device_ops brcm_netdev_ops = { diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index 1c6633f0de01..12867a4b458f 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -11,7 +11,7 @@ #include <linux/etherdevice.h> #include <linux/list.h> #include <linux/slab.h> -#include <net/dsa.h> + #include "dsa_priv.h" #define DSA_HLEN 4 @@ -28,7 +28,7 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev) */ if (skb->protocol == htons(ETH_P_8021Q)) { if (skb_cow_head(skb, 0) < 0) - goto out_free; + return NULL; /* * Construct tagged FROM_CPU DSA tag from 802.1q tag. @@ -46,7 +46,7 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev) } } else { if (skb_cow_head(skb, DSA_HLEN) < 0) - goto out_free; + return NULL; skb_push(skb, DSA_HLEN); memmove(skb->data, skb->data + DSA_HLEN, 2 * ETH_ALEN); @@ -62,10 +62,6 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev) } return skb; - -out_free: - kfree_skb(skb); - return NULL; } static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, @@ -79,7 +75,7 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, int source_port; if (unlikely(!pskb_may_pull(skb, DSA_HLEN))) - goto out_drop; + return NULL; /* * The ethertype field is part of the DSA header. @@ -90,7 +86,7 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, * Check that frame type is either TO_CPU or FORWARD. */ if ((dsa_header[0] & 0xc0) != 0x00 && (dsa_header[0] & 0xc0) != 0xc0) - goto out_drop; + return NULL; /* * Determine source device and port. @@ -103,14 +99,14 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, * port is a registered DSA port. */ if (source_device >= DSA_MAX_SWITCHES) - goto out_drop; + return NULL; ds = dst->ds[source_device]; if (!ds) - goto out_drop; + return NULL; if (source_port >= ds->num_ports || !ds->ports[source_port].netdev) - goto out_drop; + return NULL; /* * Convert the DSA header to an 802.1q header if the 'tagged' @@ -161,9 +157,6 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, skb->dev = ds->ports[source_port].netdev; return skb; - -out_drop: - return NULL; } const struct dsa_device_ops dsa_netdev_ops = { diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c index d9c668aa5e54..67a9d26f9075 100644 --- a/net/dsa/tag_edsa.c +++ b/net/dsa/tag_edsa.c @@ -11,7 +11,7 @@ #include <linux/etherdevice.h> #include <linux/list.h> #include <linux/slab.h> -#include <net/dsa.h> + #include "dsa_priv.h" #define DSA_HLEN 4 @@ -30,7 +30,7 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev) */ if (skb->protocol == htons(ETH_P_8021Q)) { if (skb_cow_head(skb, DSA_HLEN) < 0) - goto out_free; + return NULL; skb_push(skb, DSA_HLEN); memmove(skb->data, skb->data + DSA_HLEN, 2 * ETH_ALEN); @@ -55,7 +55,7 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev) } } else { if (skb_cow_head(skb, EDSA_HLEN) < 0) - goto out_free; + return NULL; skb_push(skb, EDSA_HLEN); memmove(skb->data, skb->data + EDSA_HLEN, 2 * ETH_ALEN); @@ -75,10 +75,6 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev) } return skb; - -out_free: - kfree_skb(skb); - return NULL; } static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, @@ -92,7 +88,7 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, int source_port; if (unlikely(!pskb_may_pull(skb, EDSA_HLEN))) - goto out_drop; + return NULL; /* * Skip the two null bytes after the ethertype. @@ -103,7 +99,7 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, * Check that frame type is either TO_CPU or FORWARD. */ if ((edsa_header[0] & 0xc0) != 0x00 && (edsa_header[0] & 0xc0) != 0xc0) - goto out_drop; + return NULL; /* * Determine source device and port. @@ -116,14 +112,14 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, * port is a registered DSA port. */ if (source_device >= DSA_MAX_SWITCHES) - goto out_drop; + return NULL; ds = dst->ds[source_device]; if (!ds) - goto out_drop; + return NULL; if (source_port >= ds->num_ports || !ds->ports[source_port].netdev) - goto out_drop; + return NULL; /* * If the 'tagged' bit is set, convert the DSA tag to a 802.1q @@ -180,9 +176,6 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, skb->dev = ds->ports[source_port].netdev; return skb; - -out_drop: - return NULL; } const struct dsa_device_ops edsa_netdev_ops = { diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c new file mode 100644 index 000000000000..fab41de8e983 --- /dev/null +++ b/net/dsa/tag_ksz.c @@ -0,0 +1,99 @@ +/* + * net/dsa/tag_ksz.c - Microchip KSZ Switch tag format handling + * Copyright (c) 2017 Microchip Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/etherdevice.h> +#include <linux/list.h> +#include <linux/slab.h> +#include <net/dsa.h> +#include "dsa_priv.h" + +/* For Ingress (Host -> KSZ), 2 bytes are added before FCS. + * --------------------------------------------------------------------------- + * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|tag1(1byte)|FCS(4bytes) + * --------------------------------------------------------------------------- + * tag0 : Prioritization (not used now) + * tag1 : each bit represents port (eg, 0x01=port1, 0x02=port2, 0x10=port5) + * + * For Egress (KSZ -> Host), 1 byte is added before FCS. + * --------------------------------------------------------------------------- + * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|FCS(4bytes) + * --------------------------------------------------------------------------- + * tag0 : zero-based value represents port + * (eg, 0x00=port1, 0x02=port3, 0x06=port7) + */ + +#define KSZ_INGRESS_TAG_LEN 2 +#define KSZ_EGRESS_TAG_LEN 1 + +static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct sk_buff *nskb; + int padlen; + u8 *tag; + + padlen = (skb->len >= ETH_ZLEN) ? 0 : ETH_ZLEN - skb->len; + + if (skb_tailroom(skb) >= padlen + KSZ_INGRESS_TAG_LEN) { + nskb = skb; + } else { + nskb = alloc_skb(NET_IP_ALIGN + skb->len + + padlen + KSZ_INGRESS_TAG_LEN, GFP_ATOMIC); + if (!nskb) + return NULL; + skb_reserve(nskb, NET_IP_ALIGN); + + skb_reset_mac_header(nskb); + skb_set_network_header(nskb, + skb_network_header(skb) - skb->head); + skb_set_transport_header(nskb, + skb_transport_header(skb) - skb->head); + skb_copy_and_csum_dev(skb, skb_put(nskb, skb->len)); + kfree_skb(skb); + } + + /* skb is freed when it fails */ + if (skb_put_padto(nskb, nskb->len + padlen)) + return NULL; + + tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN); + tag[0] = 0; + tag[1] = 1 << p->dp->index; /* destination port */ + + return nskb; +} + +static struct sk_buff *ksz_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, + struct net_device *orig_dev) +{ + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); + struct dsa_switch *ds = cpu_dp->ds; + u8 *tag; + int source_port; + + tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; + + source_port = tag[0] & 7; + if (source_port >= ds->num_ports || !ds->ports[source_port].netdev) + return NULL; + + pskb_trim_rcsum(skb, skb->len - KSZ_EGRESS_TAG_LEN); + + skb->dev = ds->ports[source_port].netdev; + + return skb; +} + +const struct dsa_device_ops ksz_netdev_ops = { + .xmit = ksz_xmit, + .rcv = ksz_rcv, +}; diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c index 70130ed5c21a..247774d149f9 100644 --- a/net/dsa/tag_lan9303.c +++ b/net/dsa/tag_lan9303.c @@ -14,7 +14,7 @@ #include <linux/etherdevice.h> #include <linux/list.h> #include <linux/slab.h> -#include <net/dsa.h> + #include "dsa_priv.h" /* To define the outgoing port and to discover the incoming port a regular @@ -52,7 +52,7 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev) if (skb_cow_head(skb, LAN9303_TAG_LEN) < 0) { dev_dbg(&dev->dev, "Cannot make room for the special tag. Dropping packet\n"); - goto out_free; + return NULL; } /* provide 'LAN9303_TAG_LEN' bytes additional space */ @@ -66,9 +66,6 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev) lan9303_tag[1] = htons(p->dp->index | BIT(4)); return skb; -out_free: - kfree_skb(skb); - return NULL; } static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c index 837cdddb53f0..2f32b7ea3365 100644 --- a/net/dsa/tag_mtk.c +++ b/net/dsa/tag_mtk.c @@ -13,7 +13,7 @@ */ #include <linux/etherdevice.h> -#include <net/dsa.h> + #include "dsa_priv.h" #define MTK_HDR_LEN 4 @@ -27,7 +27,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, u8 *mtk_tag; if (skb_cow_head(skb, MTK_HDR_LEN) < 0) - goto out_free; + return NULL; skb_push(skb, MTK_HDR_LEN); @@ -41,10 +41,6 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, mtk_tag[3] = 0; return skb; - -out_free: - kfree_skb(skb); - return NULL; } static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, @@ -57,7 +53,7 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, __be16 *phdr, hdr; if (unlikely(!pskb_may_pull(skb, MTK_HDR_LEN))) - goto out_drop; + return NULL; /* The MTK header is added by the switch between src addr * and ethertype at this point, skb->data points to 2 bytes @@ -79,19 +75,16 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, */ ds = dst->ds[0]; if (!ds) - goto out_drop; + return NULL; /* Get source port information */ port = (hdr & MTK_HDR_RECV_SOURCE_PORT_MASK); if (!ds->ports[port].netdev) - goto out_drop; + return NULL; skb->dev = ds->ports[port].netdev; return skb; - -out_drop: - return NULL; } const struct dsa_device_ops mtk_netdev_ops = { diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index 3ba3f59f7a34..1867a3d11f28 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -12,7 +12,7 @@ */ #include <linux/etherdevice.h> -#include <net/dsa.h> + #include "dsa_priv.h" #define QCA_HDR_LEN 2 @@ -45,7 +45,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) dev->stats.tx_bytes += skb->len; if (skb_cow_head(skb, 0) < 0) - goto out_free; + return NULL; skb_push(skb, QCA_HDR_LEN); @@ -60,10 +60,6 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) *phdr = htons(hdr); return skb; - -out_free: - kfree_skb(skb); - return NULL; } static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, @@ -71,13 +67,14 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, struct net_device *orig_dev) { struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); struct dsa_switch *ds; u8 ver; int port; __be16 *phdr, hdr; if (unlikely(!pskb_may_pull(skb, QCA_HDR_LEN))) - goto out_drop; + return NULL; /* The QCA header is added by the switch between src addr and Ethertype * At this point, skb->data points to ethertype so header should be @@ -89,7 +86,7 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, /* Make sure the version is correct */ ver = (hdr & QCA_HDR_RECV_VERSION_MASK) >> QCA_HDR_RECV_VERSION_S; if (unlikely(ver != QCA_HDR_VERSION)) - goto out_drop; + return NULL; /* Remove QCA tag and recalculate checksum */ skb_pull_rcsum(skb, QCA_HDR_LEN); @@ -99,22 +96,19 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, /* This protocol doesn't support cascading multiple switches so it's * safe to assume the switch is first in the tree */ - ds = dst->cpu_switch; + ds = cpu_dp->ds; if (!ds) - goto out_drop; + return NULL; /* Get source port information */ port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK); if (!ds->ports[port].netdev) - goto out_drop; + return NULL; /* Update skb & forward the frame accordingly */ skb->dev = ds->ports[port].netdev; return skb; - -out_drop: - return NULL; } const struct dsa_device_ops qca_netdev_ops = { diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index aafc2fc74c30..b09e56214005 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -11,7 +11,7 @@ #include <linux/etherdevice.h> #include <linux/list.h> #include <linux/slab.h> -#include <net/dsa.h> + #include "dsa_priv.h" static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev) @@ -32,10 +32,8 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev) padlen = 60 - skb->len; nskb = alloc_skb(NET_IP_ALIGN + skb->len + padlen + 4, GFP_ATOMIC); - if (nskb == NULL) { - kfree_skb(skb); + if (!nskb) return NULL; - } skb_reserve(nskb, NET_IP_ALIGN); skb_reset_mac_header(nskb); @@ -45,8 +43,7 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev) kfree_skb(skb); if (padlen) { - u8 *pad = skb_put(nskb, padlen); - memset(pad, 0, padlen); + skb_put_zero(nskb, padlen); } trailer = skb_put(nskb, 4); @@ -63,32 +60,28 @@ static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev, struct net_device *orig_dev) { struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_switch *ds; + struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); + struct dsa_switch *ds = cpu_dp->ds; u8 *trailer; int source_port; - ds = dst->cpu_switch; - if (skb_linearize(skb)) - goto out_drop; + return NULL; trailer = skb_tail_pointer(skb) - 4; if (trailer[0] != 0x80 || (trailer[1] & 0xf8) != 0x00 || (trailer[2] & 0xef) != 0x00 || trailer[3] != 0x00) - goto out_drop; + return NULL; source_port = trailer[1] & 7; if (source_port >= ds->num_ports || !ds->ports[source_port].netdev) - goto out_drop; + return NULL; pskb_trim_rcsum(skb, skb->len - 4); skb->dev = ds->ports[source_port].netdev; return skb; - -out_drop: - return NULL; } const struct dsa_device_ops trailer_netdev_ops = { diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 1446810047f5..eaeba9b99a73 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -83,7 +83,7 @@ int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned int len) { - struct ethhdr *eth = (struct ethhdr *)skb_push(skb, ETH_HLEN); + struct ethhdr *eth = skb_push(skb, ETH_HLEN); if (type != ETH_P_802_3 && type != ETH_P_802_2) eth->h_proto = htons(type); diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 0a0a392dc2bd..4e7bdb213cd0 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -284,12 +284,12 @@ static void send_hsr_supervision_frame(struct hsr_port *master, skb_reset_mac_header(skb); if (hsrVer > 0) { - hsr_tag = (typeof(hsr_tag)) skb_put(skb, sizeof(struct hsr_tag)); + hsr_tag = skb_put(skb, sizeof(struct hsr_tag)); hsr_tag->encap_proto = htons(ETH_P_PRP); set_hsr_tag_LSDU_size(hsr_tag, HSR_V1_SUP_LSDUSIZE); } - hsr_stag = (typeof(hsr_stag)) skb_put(skb, sizeof(struct hsr_sup_tag)); + hsr_stag = skb_put(skb, sizeof(struct hsr_sup_tag)); set_hsr_stag_path(hsr_stag, (hsrVer ? 0x0 : 0xf)); set_hsr_stag_HSR_Ver(hsr_stag, hsrVer); @@ -311,7 +311,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master, hsr_stag->HSR_TLV_Length = hsrVer ? sizeof(struct hsr_sup_payload) : 12; /* Payload: MacAddressA */ - hsr_sp = (typeof(hsr_sp)) skb_put(skb, sizeof(struct hsr_sup_payload)); + hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload)); ether_addr_copy(hsr_sp->MacAddressA, master->dev->dev_addr); skb_put_padto(skb, ETH_ZLEN + HSR_HLEN); diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 81dac16933fc..b9cce0fd5696 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -33,7 +33,8 @@ static const struct nla_policy hsr_policy[IFLA_HSR_MAX + 1] = { * hsr_dev_setup routine has been executed. Nice! */ static int hsr_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) { struct net_device *link[2]; unsigned char multicast_spec, hsr_version; diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index 0a866f332290..de2661cd0328 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -111,7 +111,8 @@ static void lowpan_setup(struct net_device *ldev) ldev->features |= NETIF_F_NETNS_LOCAL; } -static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[]) +static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) { if (tb[IFLA_ADDRESS]) { if (nla_len(tb[IFLA_ADDRESS]) != IEEE802154_ADDR_LEN) @@ -121,7 +122,8 @@ static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[]) } static int lowpan_newlink(struct net *src_net, struct net_device *ldev, - struct nlattr *tb[], struct nlattr *data[]) + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) { struct net_device *wdev; int ret; diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c index dbb476d7d38f..e6ff5128e61a 100644 --- a/net/ieee802154/6lowpan/tx.c +++ b/net/ieee802154/6lowpan/tx.c @@ -121,8 +121,7 @@ lowpan_alloc_frag(struct sk_buff *skb, int size, *mac_cb(frag) = *mac_cb(skb); if (frag1) { - memcpy(skb_put(frag, skb->mac_len), - skb_mac_header(skb), skb->mac_len); + skb_put_data(frag, skb_mac_header(skb), skb->mac_len); } else { rc = wpan_dev_hard_header(frag, wdev, &master_hdr->dest, @@ -152,8 +151,8 @@ lowpan_xmit_fragment(struct sk_buff *skb, const struct ieee802154_hdr *wpan_hdr, if (IS_ERR(frag)) return PTR_ERR(frag); - memcpy(skb_put(frag, frag_hdrlen), frag_hdr, frag_hdrlen); - memcpy(skb_put(frag, len), skb_network_header(skb) + offset, len); + skb_put_data(frag, frag_hdr, frag_hdrlen); + skb_put_data(frag, skb_network_header(skb) + offset, len); raw_dump_table(__func__, " fragment dump", frag->data, frag->len); diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index eedba7670b51..a60658c85a9a 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -301,15 +301,14 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) goto out_skb; skb->dev = dev; - skb->sk = sk; skb->protocol = htons(ETH_P_IEEE802154); - dev_put(dev); - err = dev_queue_xmit(skb); if (err > 0) err = net_xmit_errno(err); + dev_put(dev); + return err ?: size; out_skb: @@ -690,15 +689,14 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) goto out_skb; skb->dev = dev; - skb->sk = sk; skb->protocol = htons(ETH_P_IEEE802154); - dev_put(dev); - err = dev_queue_xmit(skb); if (err > 0) err = net_xmit_errno(err); + dev_put(dev); + return err ?: size; out_skb: diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index f83de23a30e7..afcb435adfbe 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -8,7 +8,7 @@ obj-y := route.o inetpeer.o protocol.o \ inet_timewait_sock.o inet_connection_sock.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \ - tcp_rate.o tcp_recovery.o \ + tcp_rate.o tcp_recovery.o tcp_ulp.o \ tcp_offload.o datagram.o raw.o udp.o udplite.o \ udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 58925b6597de..76c2077c3f5b 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -150,7 +150,7 @@ void inet_sock_destruct(struct sock *sk) } WARN_ON(atomic_read(&sk->sk_rmem_alloc)); - WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(refcount_read(&sk->sk_wmem_alloc)); WARN_ON(sk->sk_wmem_queued); WARN_ON(sk->sk_forward_alloc); diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 22377c8ff14b..37db44f60718 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -1,5 +1,6 @@ #define pr_fmt(fmt) "IPsec: " fmt +#include <crypto/algapi.h> #include <crypto/hash.h> #include <linux/err.h> #include <linux/module.h> @@ -220,7 +221,9 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); sg_init_table(sg, nfrags + sglists); - skb_to_sgvec_nomark(skb, sg, 0, skb->len); + err = skb_to_sgvec_nomark(skb, sg, 0, skb->len); + if (unlikely(err < 0)) + goto out_free; if (x->props.flags & XFRM_STATE_ESN) { /* Attach seqhi sg right after packet payload */ @@ -277,7 +280,7 @@ static void ah_input_done(struct crypto_async_request *base, int err) auth_data = ah_tmp_auth(work_iph, ihl); icv = ah_tmp_icv(ahp->ahash, auth_data, ahp->icv_trunc_len); - err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG: 0; + err = crypto_memneq(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0; if (err) goto out; @@ -393,7 +396,9 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) skb_push(skb, ihl); sg_init_table(sg, nfrags + sglists); - skb_to_sgvec_nomark(skb, sg, 0, skb->len); + err = skb_to_sgvec_nomark(skb, sg, 0, skb->len); + if (unlikely(err < 0)) + goto out_free; if (x->props.flags & XFRM_STATE_ESN) { /* Attach seqhi sg right after packet payload */ @@ -413,7 +418,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) goto out_free; } - err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG: 0; + err = crypto_memneq(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0; if (err) goto out_free; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index e9f3386a528b..8b52179ddc6e 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -539,7 +539,7 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, skb_reserve(skb, hlen); skb_reset_network_header(skb); - arp = (struct arphdr *) skb_put(skb, arp_hdr_len(dev)); + arp = skb_put(skb, arp_hdr_len(dev)); skb->dev = dev; skb->protocol = htons(ETH_P_ARP); if (!src_hw) @@ -1113,13 +1113,17 @@ static int arp_invalidate(struct net_device *dev, __be32 ip) { struct neighbour *neigh = neigh_lookup(&arp_tbl, &ip, dev); int err = -ENXIO; + struct neigh_table *tbl = &arp_tbl; if (neigh) { if (neigh->nud_state & ~NUD_NOARP) err = neigh_update(neigh, NULL, NUD_FAILED, NEIGH_UPDATE_F_OVERRIDE| NEIGH_UPDATE_F_ADMIN, 0); + write_lock_bh(&tbl->lock); neigh_release(neigh); + neigh_remove_one(neigh, tbl); + write_unlock_bh(&tbl->lock); } return err; diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index ae206163c273..c4c6e1969ed0 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -265,7 +265,7 @@ static int cipso_v4_cache_check(const unsigned char *key, entry->key_len == key_len && memcmp(entry->key, key, key_len) == 0) { entry->activity += 1; - atomic_inc(&entry->lsm_data->refcount); + refcount_inc(&entry->lsm_data->refcount); secattr->cache = entry->lsm_data; secattr->flags |= NETLBL_SECATTR_CACHE; secattr->type = NETLBL_NLTYPE_CIPSOV4; @@ -332,7 +332,7 @@ int cipso_v4_cache_add(const unsigned char *cipso_ptr, } entry->key_len = cipso_ptr_len; entry->hash = cipso_v4_map_cache_hash(cipso_ptr, cipso_ptr_len); - atomic_inc(&secattr->cache->refcount); + refcount_inc(&secattr->cache->refcount); entry->lsm_data = secattr->cache; bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETS - 1); @@ -375,7 +375,7 @@ static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi) struct cipso_v4_doi *iter; list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) - if (iter->doi == doi && atomic_read(&iter->refcount)) + if (iter->doi == doi && refcount_read(&iter->refcount)) return iter; return NULL; } @@ -429,7 +429,7 @@ int cipso_v4_doi_add(struct cipso_v4_doi *doi_def, } } - atomic_set(&doi_def->refcount, 1); + refcount_set(&doi_def->refcount, 1); spin_lock(&cipso_v4_doi_list_lock); if (cipso_v4_doi_search(doi_def->doi)) { @@ -533,7 +533,7 @@ int cipso_v4_doi_remove(u32 doi, struct netlbl_audit *audit_info) ret_val = -ENOENT; goto doi_remove_return; } - if (!atomic_dec_and_test(&doi_def->refcount)) { + if (!refcount_dec_and_test(&doi_def->refcount)) { spin_unlock(&cipso_v4_doi_list_lock); ret_val = -EBUSY; goto doi_remove_return; @@ -576,7 +576,7 @@ struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi) doi_def = cipso_v4_doi_search(doi); if (!doi_def) goto doi_getdef_return; - if (!atomic_inc_not_zero(&doi_def->refcount)) + if (!refcount_inc_not_zero(&doi_def->refcount)) doi_def = NULL; doi_getdef_return: @@ -597,7 +597,7 @@ void cipso_v4_doi_putdef(struct cipso_v4_doi *doi_def) if (!doi_def) return; - if (!atomic_dec_and_test(&doi_def->refcount)) + if (!refcount_dec_and_test(&doi_def->refcount)) return; spin_lock(&cipso_v4_doi_list_lock); list_del_rcu(&doi_def->list); @@ -630,7 +630,7 @@ int cipso_v4_doi_walk(u32 *skip_cnt, rcu_read_lock(); list_for_each_entry_rcu(iter_doi, &cipso_v4_doi_list, list) - if (atomic_read(&iter_doi->refcount) > 0) { + if (refcount_read(&iter_doi->refcount) > 0) { if (doi_cnt++ < *skip_cnt) continue; ret_val = callback(iter_doi, cb_arg); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index df14815a3b8c..38d9af9b917c 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -176,6 +176,7 @@ EXPORT_SYMBOL(__ip_dev_find); static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); +static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain); static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, int destroy); #ifdef CONFIG_SYSCTL @@ -251,7 +252,7 @@ static struct in_device *inetdev_init(struct net_device *dev) /* Reference in_dev->dev */ dev_hold(dev); /* Account for reference dev->ip_ptr (below) */ - in_dev_hold(in_dev); + refcount_set(&in_dev->refcnt, 1); err = devinet_sysctl_register(in_dev); if (err) { @@ -441,6 +442,8 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, { struct in_device *in_dev = ifa->ifa_dev; struct in_ifaddr *ifa1, **ifap, **last_primary; + struct in_validator_info ivi; + int ret; ASSERT_RTNL(); @@ -471,6 +474,23 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, } } + /* Allow any devices that wish to register ifaddr validtors to weigh + * in now, before changes are committed. The rntl lock is serializing + * access here, so the state should not change between a validator call + * and a final notify on commit. This isn't invoked on promotion under + * the assumption that validators are checking the address itself, and + * not the flags. + */ + ivi.ivi_addr = ifa->ifa_address; + ivi.ivi_dev = ifa->ifa_dev; + ret = blocking_notifier_call_chain(&inetaddr_validator_chain, + NETDEV_UP, &ivi); + ret = notifier_to_errno(ret); + if (ret) { + inet_free_ifa(ifa); + return ret; + } + if (!(ifa->ifa_flags & IFA_F_SECONDARY)) { prandom_seed((__force u32) ifa->ifa_local); ifap = last_primary; @@ -1356,6 +1376,19 @@ int unregister_inetaddr_notifier(struct notifier_block *nb) } EXPORT_SYMBOL(unregister_inetaddr_notifier); +int register_inetaddr_validator_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&inetaddr_validator_chain, nb); +} +EXPORT_SYMBOL(register_inetaddr_validator_notifier); + +int unregister_inetaddr_validator_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&inetaddr_validator_chain, + nb); +} +EXPORT_SYMBOL(unregister_inetaddr_validator_notifier); + /* Rename ifa_labels for a device name change. Make some effort to preserve * existing alias numbering and to create unique labels if possible. */ diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 93322f895eab..0cbee0a666ff 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -307,7 +307,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * skb->data_len += tailen; skb->truesize += tailen; if (sk) - atomic_add(tailen, &sk->sk_wmem_alloc); + refcount_add(tailen, &sk->sk_wmem_alloc); goto out; } @@ -377,9 +377,11 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * esp->esph = esph; sg_init_table(sg, esp->nfrags); - skb_to_sgvec(skb, sg, - (unsigned char *)esph - skb->data, - assoclen + ivlen + esp->clen + alen); + err = skb_to_sgvec(skb, sg, + (unsigned char *)esph - skb->data, + assoclen + ivlen + esp->clen + alen); + if (unlikely(err < 0)) + goto error; if (!esp->inplace) { int allocsize; @@ -403,9 +405,11 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * spin_unlock_bh(&x->lock); sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1); - skb_to_sgvec(skb, dsg, - (unsigned char *)esph - skb->data, - assoclen + ivlen + esp->clen + alen); + err = skb_to_sgvec(skb, dsg, + (unsigned char *)esph - skb->data, + assoclen + ivlen + esp->clen + alen); + if (unlikely(err < 0)) + goto error; } if ((x->props.flags & XFRM_STATE_ESN)) @@ -605,7 +609,7 @@ static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi) * decryption. */ if ((x->props.flags & XFRM_STATE_ESN)) { - esph = (void *)skb_push(skb, 4); + esph = skb_push(skb, 4); *seqhi = esph->spi; esph->spi = esph->seq_no; esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi; @@ -690,7 +694,9 @@ skip_cow: esp_input_set_header(skb, seqhi); sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, 0, skb->len); + err = skb_to_sgvec(skb, sg, 0, skb->len); + if (unlikely(err < 0)) + goto out; skb->ip_summed = CHECKSUM_NONE; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 83e3ed258467..4e678fa892dd 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -588,13 +588,15 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (cmd == SIOCDELRT) { tb = fib_get_table(net, cfg.fc_table); if (tb) - err = fib_table_delete(net, tb, &cfg); + err = fib_table_delete(net, tb, &cfg, + NULL); else err = -ESRCH; } else { tb = fib_new_table(net, cfg.fc_table); if (tb) - err = fib_table_insert(net, tb, &cfg); + err = fib_table_insert(net, tb, + &cfg, NULL); else err = -ENOBUFS; } @@ -626,14 +628,15 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { }; static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, - struct nlmsghdr *nlh, struct fib_config *cfg) + struct nlmsghdr *nlh, struct fib_config *cfg, + struct netlink_ext_ack *extack) { struct nlattr *attr; int err, remaining; struct rtmsg *rtm; err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy, - NULL); + extack); if (err < 0) goto errout; @@ -654,6 +657,7 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, cfg->fc_nlinfo.nl_net = net; if (cfg->fc_type > RTN_MAX) { + NL_SET_ERR_MSG(extack, "Invalid route type"); err = -EINVAL; goto errout; } @@ -681,7 +685,8 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, break; case RTA_MULTIPATH: err = lwtunnel_valid_encap_type_attr(nla_data(attr), - nla_len(attr)); + nla_len(attr), + extack); if (err < 0) goto errout; cfg->fc_mp = nla_data(attr); @@ -698,7 +703,8 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, break; case RTA_ENCAP_TYPE: cfg->fc_encap_type = nla_get_u16(attr); - err = lwtunnel_valid_encap_type(cfg->fc_encap_type); + err = lwtunnel_valid_encap_type(cfg->fc_encap_type, + extack); if (err < 0) goto errout; break; @@ -718,17 +724,18 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, struct fib_table *tb; int err; - err = rtm_to_fib_config(net, skb, nlh, &cfg); + err = rtm_to_fib_config(net, skb, nlh, &cfg, extack); if (err < 0) goto errout; tb = fib_get_table(net, cfg.fc_table); if (!tb) { + NL_SET_ERR_MSG(extack, "FIB table does not exist"); err = -ESRCH; goto errout; } - err = fib_table_delete(net, tb, &cfg); + err = fib_table_delete(net, tb, &cfg, extack); errout: return err; } @@ -741,7 +748,7 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, struct fib_table *tb; int err; - err = rtm_to_fib_config(net, skb, nlh, &cfg); + err = rtm_to_fib_config(net, skb, nlh, &cfg, extack); if (err < 0) goto errout; @@ -751,7 +758,7 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, goto errout; } - err = fib_table_insert(net, tb, &cfg); + err = fib_table_insert(net, tb, &cfg, extack); errout: return err; } @@ -845,9 +852,9 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad cfg.fc_scope = RT_SCOPE_HOST; if (cmd == RTM_NEWROUTE) - fib_table_insert(net, tb, &cfg); + fib_table_insert(net, tb, &cfg, NULL); else - fib_table_delete(net, tb, &cfg); + fib_table_delete(net, tb, &cfg, NULL); } void fib_add_ifaddr(struct in_ifaddr *ifa) diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index 9c02920725db..769ab87ebc4b 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -28,8 +28,10 @@ static inline void fib_alias_accessed(struct fib_alias *fa) /* Exported by fib_semantics.c */ void fib_release_info(struct fib_info *); -struct fib_info *fib_create_info(struct fib_config *cfg); -int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); +struct fib_info *fib_create_info(struct fib_config *cfg, + struct netlink_ext_ack *extack); +int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, + struct netlink_ext_ack *extack); int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index ad9ad4aab5da..222100103808 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -32,6 +32,7 @@ #include <linux/skbuff.h> #include <linux/init.h> #include <linux/slab.h> +#include <linux/netlink.h> #include <net/arp.h> #include <net/ip.h> @@ -151,7 +152,8 @@ static void rt_fibinfo_free(struct rtable __rcu **rtp) * free_fib_info_rcu() */ - dst_free(&rt->dst); + dst_dev_put(&rt->dst); + dst_release_immediate(&rt->dst); } static void free_nh_exceptions(struct fib_nh *nh) @@ -193,8 +195,10 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp) struct rtable *rt; rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1); - if (rt) - dst_free(&rt->dst); + if (rt) { + dst_dev_put(&rt->dst); + dst_release_immediate(&rt->dst); + } } free_percpu(rtp); } @@ -456,7 +460,8 @@ static int fib_detect_death(struct fib_info *fi, int order, #ifdef CONFIG_IP_ROUTE_MULTIPATH -static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining) +static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining, + struct netlink_ext_ack *extack) { int nhs = 0; @@ -466,22 +471,35 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining) } /* leftover implies invalid nexthop configuration, discard it */ - return remaining > 0 ? 0 : nhs; + if (remaining > 0) { + NL_SET_ERR_MSG(extack, + "Invalid nexthop configuration - extra data after nexthops"); + nhs = 0; + } + + return nhs; } static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, - int remaining, struct fib_config *cfg) + int remaining, struct fib_config *cfg, + struct netlink_ext_ack *extack) { int ret; change_nexthops(fi) { int attrlen; - if (!rtnh_ok(rtnh, remaining)) + if (!rtnh_ok(rtnh, remaining)) { + NL_SET_ERR_MSG(extack, + "Invalid nexthop configuration - extra data after nexthop"); return -EINVAL; + } - if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) + if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) { + NL_SET_ERR_MSG(extack, + "Invalid flags for nexthop - can not contain DEAD or LINKDOWN"); return -EINVAL; + } nexthop_nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; @@ -507,13 +525,17 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); - if (!nla_entype) + if (!nla_entype) { + NL_SET_BAD_ATTR(extack, nla); + NL_SET_ERR_MSG(extack, + "Encap type is missing"); goto err_inval; + } ret = lwtunnel_build_state(nla_get_u16( nla_entype), nla, AF_INET, cfg, - &lwtstate); + &lwtstate, extack); if (ret) goto errout; nexthop_nh->nh_lwtstate = @@ -595,7 +617,8 @@ static inline void fib_add_weight(struct fib_info *fi, static int fib_encap_match(u16 encap_type, struct nlattr *encap, const struct fib_nh *nh, - const struct fib_config *cfg) + const struct fib_config *cfg, + struct netlink_ext_ack *extack) { struct lwtunnel_state *lwtstate; int ret, result = 0; @@ -603,8 +626,8 @@ static int fib_encap_match(u16 encap_type, if (encap_type == LWTUNNEL_ENCAP_NONE) return 0; - ret = lwtunnel_build_state(encap_type, encap, - AF_INET, cfg, &lwtstate); + ret = lwtunnel_build_state(encap_type, encap, AF_INET, + cfg, &lwtstate, extack); if (!ret) { result = lwtunnel_cmp_encap(lwtstate, nh->nh_lwtstate); lwtstate_free(lwtstate); @@ -613,7 +636,8 @@ static int fib_encap_match(u16 encap_type, return result; } -int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) +int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, + struct netlink_ext_ack *extack) { #ifdef CONFIG_IP_ROUTE_MULTIPATH struct rtnexthop *rtnh; @@ -625,9 +649,9 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) if (cfg->fc_oif || cfg->fc_gw) { if (cfg->fc_encap) { - if (fib_encap_match(cfg->fc_encap_type, - cfg->fc_encap, fi->fib_nh, cfg)) - return 1; + if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap, + fi->fib_nh, cfg, extack)) + return 1; } if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) && (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw)) @@ -716,7 +740,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) * |-> {local prefix} (terminal node) */ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, - struct fib_nh *nh) + struct fib_nh *nh, struct netlink_ext_ack *extack) { int err = 0; struct net *net; @@ -729,16 +753,25 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, if (nh->nh_flags & RTNH_F_ONLINK) { unsigned int addr_type; - if (cfg->fc_scope >= RT_SCOPE_LINK) + if (cfg->fc_scope >= RT_SCOPE_LINK) { + NL_SET_ERR_MSG(extack, + "Nexthop has invalid scope"); return -EINVAL; + } dev = __dev_get_by_index(net, nh->nh_oif); if (!dev) return -ENODEV; - if (!(dev->flags & IFF_UP)) + if (!(dev->flags & IFF_UP)) { + NL_SET_ERR_MSG(extack, + "Nexthop device is not up"); return -ENETDOWN; + } addr_type = inet_addr_type_dev_table(net, dev, nh->nh_gw); - if (addr_type != RTN_UNICAST) + if (addr_type != RTN_UNICAST) { + NL_SET_ERR_MSG(extack, + "Nexthop has invalid gateway"); return -EINVAL; + } if (!netif_carrier_ok(dev)) nh->nh_flags |= RTNH_F_LINKDOWN; nh->nh_dev = dev; @@ -778,18 +811,25 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, } if (err) { + NL_SET_ERR_MSG(extack, + "Nexthop has invalid gateway"); rcu_read_unlock(); return err; } } err = -EINVAL; - if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) + if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) { + NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); goto out; + } nh->nh_scope = res.scope; nh->nh_oif = FIB_RES_OIF(res); nh->nh_dev = dev = FIB_RES_DEV(res); - if (!dev) + if (!dev) { + NL_SET_ERR_MSG(extack, + "No egress device for nexthop gateway"); goto out; + } dev_hold(dev); if (!netif_carrier_ok(dev)) nh->nh_flags |= RTNH_F_LINKDOWN; @@ -797,17 +837,21 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, } else { struct in_device *in_dev; - if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) + if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) { + NL_SET_ERR_MSG(extack, + "Invalid flags for nexthop - PERVASIVE and ONLINK can not be set"); return -EINVAL; - + } rcu_read_lock(); err = -ENODEV; in_dev = inetdev_by_index(net, nh->nh_oif); if (!in_dev) goto out; err = -ENETDOWN; - if (!(in_dev->dev->flags & IFF_UP)) + if (!(in_dev->dev->flags & IFF_UP)) { + NL_SET_ERR_MSG(extack, "Device for nexthop is not up"); goto out; + } nh->nh_dev = in_dev->dev; dev_hold(nh->nh_dev); nh->nh_scope = RT_SCOPE_HOST; @@ -982,7 +1026,8 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg) return 0; } -struct fib_info *fib_create_info(struct fib_config *cfg) +struct fib_info *fib_create_info(struct fib_config *cfg, + struct netlink_ext_ack *extack) { int err; struct fib_info *fi = NULL; @@ -994,15 +1039,20 @@ struct fib_info *fib_create_info(struct fib_config *cfg) goto err_inval; /* Fast check to catch the most weird cases */ - if (fib_props[cfg->fc_type].scope > cfg->fc_scope) + if (fib_props[cfg->fc_type].scope > cfg->fc_scope) { + NL_SET_ERR_MSG(extack, "Invalid scope"); goto err_inval; + } - if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) + if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) { + NL_SET_ERR_MSG(extack, + "Invalid rtm_flags - can not contain DEAD or LINKDOWN"); goto err_inval; + } #ifdef CONFIG_IP_ROUTE_MULTIPATH if (cfg->fc_mp) { - nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len); + nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len, extack); if (nhs == 0) goto err_inval; } @@ -1065,18 +1115,29 @@ struct fib_info *fib_create_info(struct fib_config *cfg) if (cfg->fc_mp) { #ifdef CONFIG_IP_ROUTE_MULTIPATH - err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg); + err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack); if (err != 0) goto failure; - if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) + if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) { + NL_SET_ERR_MSG(extack, + "Nexthop device index does not match RTA_OIF"); goto err_inval; - if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) + } + if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) { + NL_SET_ERR_MSG(extack, + "Nexthop gateway does not match RTA_GATEWAY"); goto err_inval; + } #ifdef CONFIG_IP_ROUTE_CLASSID - if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) + if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) { + NL_SET_ERR_MSG(extack, + "Nexthop class id does not match RTA_FLOW"); goto err_inval; + } #endif #else + NL_SET_ERR_MSG(extack, + "Multipath support not enabled in kernel"); goto err_inval; #endif } else { @@ -1085,11 +1146,14 @@ struct fib_info *fib_create_info(struct fib_config *cfg) if (cfg->fc_encap) { struct lwtunnel_state *lwtstate; - if (cfg->fc_encap_type == LWTUNNEL_ENCAP_NONE) + if (cfg->fc_encap_type == LWTUNNEL_ENCAP_NONE) { + NL_SET_ERR_MSG(extack, + "LWT encap type not specified"); goto err_inval; + } err = lwtunnel_build_state(cfg->fc_encap_type, cfg->fc_encap, AF_INET, cfg, - &lwtstate); + &lwtstate, extack); if (err) goto failure; @@ -1109,8 +1173,11 @@ struct fib_info *fib_create_info(struct fib_config *cfg) } if (fib_props[cfg->fc_type].error) { - if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) + if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) { + NL_SET_ERR_MSG(extack, + "Gateway, device and multipath can not be specified for this route type"); goto err_inval; + } goto link_it; } else { switch (cfg->fc_type) { @@ -1121,19 +1188,30 @@ struct fib_info *fib_create_info(struct fib_config *cfg) case RTN_MULTICAST: break; default: + NL_SET_ERR_MSG(extack, "Invalid route type"); goto err_inval; } } - if (cfg->fc_scope > RT_SCOPE_HOST) + if (cfg->fc_scope > RT_SCOPE_HOST) { + NL_SET_ERR_MSG(extack, "Invalid scope"); goto err_inval; + } if (cfg->fc_scope == RT_SCOPE_HOST) { struct fib_nh *nh = fi->fib_nh; /* Local address is added. */ - if (nhs != 1 || nh->nh_gw) + if (nhs != 1) { + NL_SET_ERR_MSG(extack, + "Route with host scope can not have multiple nexthops"); goto err_inval; + } + if (nh->nh_gw) { + NL_SET_ERR_MSG(extack, + "Route with host scope can not have a gateway"); + goto err_inval; + } nh->nh_scope = RT_SCOPE_NOWHERE; nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif); err = -ENODEV; @@ -1143,7 +1221,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) int linkdown = 0; change_nexthops(fi) { - err = fib_check_nh(cfg, fi, nexthop_nh); + err = fib_check_nh(cfg, fi, nexthop_nh, extack); if (err != 0) goto failure; if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN) @@ -1153,8 +1231,10 @@ struct fib_info *fib_create_info(struct fib_config *cfg) fi->fib_flags |= RTNH_F_LINKDOWN; } - if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc)) + if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc)) { + NL_SET_ERR_MSG(extack, "Invalid prefsrc address"); goto err_inval; + } change_nexthops(fi) { fib_info_update_nh_saddr(net, nexthop_nh); @@ -1173,7 +1253,7 @@ link_it: } fi->fib_treeref++; - atomic_inc(&fi->fib_clntref); + refcount_set(&fi->fib_clntref, 1); spin_lock_bh(&fib_info_lock); hlist_add_head(&fi->fib_hash, &fib_info_hash[fib_info_hashfn(fi)]); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 51182ff2b441..64668c69dda6 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1099,9 +1099,25 @@ static int fib_insert_alias(struct trie *t, struct key_vector *tp, return 0; } +static bool fib_valid_key_len(u32 key, u8 plen, struct netlink_ext_ack *extack) +{ + if (plen > KEYLENGTH) { + NL_SET_ERR_MSG(extack, "Invalid prefix length"); + return false; + } + + if ((plen < KEYLENGTH) && (key << plen)) { + NL_SET_ERR_MSG(extack, + "Invalid prefix for given prefix length"); + return false; + } + + return true; +} + /* Caller must hold RTNL. */ int fib_table_insert(struct net *net, struct fib_table *tb, - struct fib_config *cfg) + struct fib_config *cfg, struct netlink_ext_ack *extack) { enum fib_event_type event = FIB_EVENT_ENTRY_ADD; struct trie *t = (struct trie *)tb->tb_data; @@ -1115,17 +1131,14 @@ int fib_table_insert(struct net *net, struct fib_table *tb, u32 key; int err; - if (plen > KEYLENGTH) - return -EINVAL; - key = ntohl(cfg->fc_dst); - pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen); - - if ((plen < KEYLENGTH) && (key << plen)) + if (!fib_valid_key_len(key, plen, extack)) return -EINVAL; - fi = fib_create_info(cfg); + pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen); + + fi = fib_create_info(cfg, extack); if (IS_ERR(fi)) { err = PTR_ERR(fi); goto err; @@ -1450,8 +1463,9 @@ found: } if (!(fib_flags & FIB_LOOKUP_NOREF)) - atomic_inc(&fi->fib_clntref); + refcount_inc(&fi->fib_clntref); + res->prefix = htonl(n->key); res->prefixlen = KEYLENGTH - fa->fa_slen; res->nh_sel = nhsel; res->type = fa->fa_type; @@ -1507,7 +1521,7 @@ static void fib_remove_alias(struct trie *t, struct key_vector *tp, /* Caller must hold RTNL. */ int fib_table_delete(struct net *net, struct fib_table *tb, - struct fib_config *cfg) + struct fib_config *cfg, struct netlink_ext_ack *extack) { struct trie *t = (struct trie *) tb->tb_data; struct fib_alias *fa, *fa_to_delete; @@ -1517,12 +1531,9 @@ int fib_table_delete(struct net *net, struct fib_table *tb, u8 tos = cfg->fc_tos; u32 key; - if (plen > KEYLENGTH) - return -EINVAL; - key = ntohl(cfg->fc_dst); - if ((plen < KEYLENGTH) && (key << plen)) + if (!fib_valid_key_len(key, plen, extack)) return -EINVAL; l = fib_find_node(t, &tp, key); @@ -1551,7 +1562,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb, fi->fib_prefsrc == cfg->fc_prefsrc) && (!cfg->fc_protocol || fi->fib_protocol == cfg->fc_protocol) && - fib_nh_match(cfg, fi) == 0) { + fib_nh_match(cfg, fi, extack) == 0) { fa_to_delete = fa; break; } diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 805f6607f8d9..8e0257d01200 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -8,6 +8,7 @@ #include <linux/kernel.h> #include <net/genetlink.h> #include <net/gue.h> +#include <net/fou.h> #include <net/ip.h> #include <net/protocol.h> #include <net/udp.h> @@ -859,25 +860,6 @@ size_t gue_encap_hlen(struct ip_tunnel_encap *e) } EXPORT_SYMBOL(gue_encap_hlen); -static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e, - struct flowi4 *fl4, u8 *protocol, __be16 sport) -{ - struct udphdr *uh; - - skb_push(skb, sizeof(struct udphdr)); - skb_reset_transport_header(skb); - - uh = udp_hdr(skb); - - uh->dest = e->dport; - uh->source = sport; - uh->len = htons(skb->len); - udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb, - fl4->saddr, fl4->daddr, skb->len); - - *protocol = IPPROTO_UDP; -} - int __fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, u8 *protocol, __be16 *sport, int type) { @@ -894,24 +876,6 @@ int __fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, } EXPORT_SYMBOL(__fou_build_header); -int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, - u8 *protocol, struct flowi4 *fl4) -{ - int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM : - SKB_GSO_UDP_TUNNEL; - __be16 sport; - int err; - - err = __fou_build_header(skb, e, protocol, &sport, type); - if (err) - return err; - - fou_build_udp(skb, e, fl4, protocol, sport); - - return 0; -} -EXPORT_SYMBOL(fou_build_header); - int __gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, u8 *protocol, __be16 *sport, int type) { @@ -985,8 +949,46 @@ int __gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, } EXPORT_SYMBOL(__gue_build_header); -int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, - u8 *protocol, struct flowi4 *fl4) +#ifdef CONFIG_NET_FOU_IP_TUNNELS + +static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e, + struct flowi4 *fl4, u8 *protocol, __be16 sport) +{ + struct udphdr *uh; + + skb_push(skb, sizeof(struct udphdr)); + skb_reset_transport_header(skb); + + uh = udp_hdr(skb); + + uh->dest = e->dport; + uh->source = sport; + uh->len = htons(skb->len); + udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb, + fl4->saddr, fl4->daddr, skb->len); + + *protocol = IPPROTO_UDP; +} + +static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, struct flowi4 *fl4) +{ + int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM : + SKB_GSO_UDP_TUNNEL; + __be16 sport; + int err; + + err = __fou_build_header(skb, e, protocol, &sport, type); + if (err) + return err; + + fou_build_udp(skb, e, fl4, protocol, sport); + + return 0; +} + +static int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, struct flowi4 *fl4) { int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; @@ -1001,9 +1003,7 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, return 0; } -EXPORT_SYMBOL(gue_build_header); -#ifdef CONFIG_NET_FOU_IP_TUNNELS static const struct ip_tunnel_encap_ops fou_iptun_ops = { .encap_hlen = fou_encap_hlen, diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 9144fa7df2ad..c2be26b98b5f 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -489,7 +489,7 @@ static struct rtable *icmp_route_lookup(struct net *net, fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev); security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4)); - rt = __ip_route_output_key_hash(net, fl4, skb_in); + rt = ip_route_output_key_hash(net, fl4, skb_in); if (IS_ERR(rt)) return rt; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index ec9a396fa466..28f14afd0dd3 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -173,7 +173,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, static void ip_ma_put(struct ip_mc_list *im) { - if (atomic_dec_and_test(&im->refcnt)) { + if (refcount_dec_and_test(&im->refcnt)) { in_dev_put(im->interface); kfree_rcu(im, rcu); } @@ -199,7 +199,7 @@ static void igmp_stop_timer(struct ip_mc_list *im) { spin_lock_bh(&im->lock); if (del_timer(&im->timer)) - atomic_dec(&im->refcnt); + refcount_dec(&im->refcnt); im->tm_running = 0; im->reporter = 0; im->unsolicit_count = 0; @@ -213,7 +213,7 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay) im->tm_running = 1; if (!mod_timer(&im->timer, jiffies+tv+2)) - atomic_inc(&im->refcnt); + refcount_inc(&im->refcnt); } static void igmp_gq_start_timer(struct in_device *in_dev) @@ -249,7 +249,7 @@ static void igmp_mod_timer(struct ip_mc_list *im, int max_delay) spin_unlock_bh(&im->lock); return; } - atomic_dec(&im->refcnt); + refcount_dec(&im->refcnt); } igmp_start_timer(im, max_delay); spin_unlock_bh(&im->lock); @@ -414,7 +414,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc, skb = igmpv3_newpack(dev, dev->mtu); if (!skb) return NULL; - pgr = (struct igmpv3_grec *)skb_put(skb, sizeof(struct igmpv3_grec)); + pgr = skb_put(skb, sizeof(struct igmpv3_grec)); pgr->grec_type = type; pgr->grec_auxwords = 0; pgr->grec_nsrcs = 0; @@ -508,7 +508,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, } if (!skb) return NULL; - psrc = (__be32 *)skb_put(skb, sizeof(__be32)); + psrc = skb_put(skb, sizeof(__be32)); *psrc = psf->sf_inaddr; scount++; stotal++; if ((type == IGMPV3_ALLOW_NEW_SOURCES || @@ -742,7 +742,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, ((u8 *)&iph[1])[2] = 0; ((u8 *)&iph[1])[3] = 0; - ih = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); + ih = skb_put(skb, sizeof(struct igmphdr)); ih->type = type; ih->code = 0; ih->csum = 0; @@ -1374,7 +1374,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) /* initial mode is (EX, empty) */ im->sfmode = MCAST_EXCLUDE; im->sfcount[MCAST_EXCLUDE] = 1; - atomic_set(&im->refcnt, 1); + refcount_set(&im->refcnt, 1); spin_lock_init(&im->lock); #ifdef CONFIG_IP_MULTICAST setup_timer(&im->timer, igmp_timer_expire, (unsigned long)im); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 1054d330bf9d..4089c013cb03 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -25,6 +25,7 @@ #include <net/xfrm.h> #include <net/tcp.h> #include <net/sock_reuseport.h> +#include <net/addrconf.h> #ifdef INET_CSK_DEBUG const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; @@ -755,7 +756,7 @@ static void reqsk_queue_hash_req(struct request_sock *req, * are committed to memory and refcnt initialized. */ smp_wmb(); - atomic_set(&req->rsk_refcnt, 2 + 1); + refcount_set(&req->rsk_refcnt, 2 + 1); } void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, @@ -789,7 +790,6 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port; inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num; inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num); - newsk->sk_write_space = sk_stream_write_space; /* listeners have SOCK_RCU_FREE, not the children */ sock_reset_flag(newsk, SOCK_RCU_FREE); diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index b5e9317eaf9e..96e95e83cc61 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -276,11 +276,11 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) { if (del_timer(&fq->timer)) - atomic_dec(&fq->refcnt); + refcount_dec(&fq->refcnt); if (!(fq->flags & INET_FRAG_COMPLETE)) { fq_unlink(fq, f); - atomic_dec(&fq->refcnt); + refcount_dec(&fq->refcnt); } } EXPORT_SYMBOL(inet_frag_kill); @@ -329,7 +329,7 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, */ hlist_for_each_entry(qp, &hb->chain, list) { if (qp->net == nf && f->match(qp, arg)) { - atomic_inc(&qp->refcnt); + refcount_inc(&qp->refcnt); spin_unlock(&hb->chain_lock); qp_in->flags |= INET_FRAG_COMPLETE; inet_frag_put(qp_in, f); @@ -339,9 +339,9 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, #endif qp = qp_in; if (!mod_timer(&qp->timer, jiffies + nf->timeout)) - atomic_inc(&qp->refcnt); + refcount_inc(&qp->refcnt); - atomic_inc(&qp->refcnt); + refcount_inc(&qp->refcnt); hlist_add_head(&qp->list, &hb->chain); spin_unlock(&hb->chain_lock); @@ -370,7 +370,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, setup_timer(&q->timer, f->frag_expire, (unsigned long)q); spin_lock_init(&q->lock); - atomic_set(&q->refcnt, 1); + refcount_set(&q->refcnt, 1); return q; } @@ -405,7 +405,7 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, spin_lock(&hb->chain_lock); hlist_for_each_entry(q, &hb->chain, list) { if (q->net == nf && f->match(q, key)) { - atomic_inc(&q->refcnt); + refcount_inc(&q->refcnt); spin_unlock(&hb->chain_lock); return q; } diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index e9a59d2d91d4..2e3389d614d1 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -43,7 +43,7 @@ static u32 inet_ehashfn(const struct net *net, const __be32 laddr, /* This function handles inet_sock, but also timewait and request sockets * for IPv4/IPv6. */ -u32 sk_ehashfn(const struct sock *sk) +static u32 sk_ehashfn(const struct sock *sk) { #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6 && @@ -246,7 +246,7 @@ EXPORT_SYMBOL_GPL(__inet_lookup_listener); /* All sockets share common refcount, but have different destructors */ void sock_gen_put(struct sock *sk) { - if (!atomic_dec_and_test(&sk->sk_refcnt)) + if (!refcount_dec_and_test(&sk->sk_refcnt)) return; if (sk->sk_state == TCP_TIME_WAIT) @@ -287,7 +287,7 @@ begin: continue; if (likely(INET_MATCH(sk, net, acookie, saddr, daddr, ports, dif))) { - if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) + if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) goto out; if (unlikely(!INET_MATCH(sk, net, acookie, saddr, daddr, ports, dif))) { diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index f8aff2c71cde..5b039159e67a 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -76,7 +76,7 @@ void inet_twsk_free(struct inet_timewait_sock *tw) void inet_twsk_put(struct inet_timewait_sock *tw) { - if (atomic_dec_and_test(&tw->tw_refcnt)) + if (refcount_dec_and_test(&tw->tw_refcnt)) inet_twsk_free(tw); } EXPORT_SYMBOL_GPL(inet_twsk_put); @@ -131,7 +131,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, * We can use atomic_set() because prior spin_lock()/spin_unlock() * committed into memory all tw fields. */ - atomic_set(&tw->tw_refcnt, 4); + refcount_set(&tw->tw_refcnt, 4); inet_twsk_add_node_rcu(tw, &ehead->chain); /* Step 3: Remove SK from hash chain */ @@ -195,7 +195,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, * to a non null value before everything is setup for this * timewait socket. */ - atomic_set(&tw->tw_refcnt, 0); + refcount_set(&tw->tw_refcnt, 0); __module_get(tw->tw_prot->owner); } @@ -278,7 +278,7 @@ restart: atomic_read(&twsk_net(tw)->count)) continue; - if (unlikely(!atomic_inc_not_zero(&tw->tw_refcnt))) + if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt))) continue; if (unlikely((tw->tw_family != family) || diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 86fa45809540..c5a117cc6619 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -115,7 +115,7 @@ static void inetpeer_gc_worker(struct work_struct *work) n = list_entry(p->gc_list.next, struct inet_peer, gc_list); - if (!atomic_read(&p->refcnt)) { + if (refcount_read(&p->refcnt) == 1) { list_del(&p->gc_list); kmem_cache_free(peer_cachep, p); } @@ -202,10 +202,11 @@ static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, int cmp = inetpeer_addr_cmp(daddr, &u->daddr); if (cmp == 0) { /* Before taking a reference, check if this entry was - * deleted (refcnt=-1) + * deleted (refcnt=0) */ - if (!atomic_add_unless(&u->refcnt, 1, -1)) + if (!refcount_inc_not_zero(&u->refcnt)) { u = NULL; + } return u; } if (cmp == -1) @@ -382,11 +383,10 @@ static int inet_peer_gc(struct inet_peer_base *base, while (stackptr > stack) { stackptr--; p = rcu_deref_locked(**stackptr, base); - if (atomic_read(&p->refcnt) == 0) { + if (refcount_read(&p->refcnt) == 1) { smp_rmb(); delta = (__u32)jiffies - p->dtime; - if (delta >= ttl && - atomic_cmpxchg(&p->refcnt, 0, -1) == 0) { + if (delta >= ttl && refcount_dec_if_one(&p->refcnt)) { p->gc_next = gchead; gchead = p; } @@ -432,7 +432,7 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base, relookup: p = lookup(daddr, stack, base); if (p != peer_avl_empty) { - atomic_inc(&p->refcnt); + refcount_inc(&p->refcnt); write_sequnlock_bh(&base->lock); return p; } @@ -444,7 +444,7 @@ relookup: p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL; if (p) { p->daddr = *daddr; - atomic_set(&p->refcnt, 1); + refcount_set(&p->refcnt, 2); atomic_set(&p->rid, 0); p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; @@ -468,7 +468,7 @@ void inet_putpeer(struct inet_peer *p) { p->dtime = (__u32)jiffies; smp_mb__before_atomic(); - atomic_dec(&p->refcnt); + refcount_dec(&p->refcnt); } EXPORT_SYMBOL_GPL(inet_putpeer); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index b3cdeec85f1f..9a8cfac503dc 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -312,7 +312,7 @@ static int ip_frag_reinit(struct ipq *qp) unsigned int sum_truesize = 0; if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) { - atomic_inc(&qp->q.refcnt); + refcount_inc(&qp->q.refcnt); return -ETIMEDOUT; } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index e90c80a548ad..7a7829e839c2 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -592,7 +592,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, struct iphdr *iph; struct gre_base_hdr *greh; - iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph)); + iph = skb_push(skb, t->hlen + sizeof(*iph)); greh = (struct gre_base_hdr *)(iph+1); greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags); greh->protocol = htons(type); @@ -779,7 +779,8 @@ static struct pernet_operations ipgre_net_ops = { .size = sizeof(struct ip_tunnel_net), }; -static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) +static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) { __be16 flags; @@ -802,7 +803,8 @@ static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } -static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[]) +static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) { __be32 daddr; @@ -823,7 +825,7 @@ static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[]) } out: - return ipgre_tunnel_validate(tb, data); + return ipgre_tunnel_validate(tb, data, extack); } static int ipgre_netlink_parms(struct net_device *dev, @@ -957,7 +959,8 @@ static void ipgre_tap_setup(struct net_device *dev) } static int ipgre_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) { struct ip_tunnel_parm p; struct ip_tunnel_encap ipencap; @@ -979,7 +982,8 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev, } static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], - struct nlattr *data[]) + struct nlattr *data[], + struct netlink_ext_ack *extack) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm p; @@ -1155,7 +1159,7 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, t = netdev_priv(dev); t->collect_md = true; - err = ipgre_newlink(net, dev, tb, NULL); + err = ipgre_newlink(net, dev, tb, NULL, NULL); if (err < 0) { free_netdev(dev); return ERR_PTR(err); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 532b36e9ce2a..7eb252dcecee 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -173,7 +173,8 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk, } skb->priority = sk->sk_priority; - skb->mark = sk->sk_mark; + if (!skb->mark) + skb->mark = sk->sk_mark; /* Send it out. */ return ip_local_out(net, skb->sk, skb); @@ -1037,7 +1038,7 @@ alloc_new_skb: (flags & MSG_DONTWAIT), &err); } else { skb = NULL; - if (atomic_read(&sk->sk_wmem_alloc) <= + if (refcount_read(&sk->sk_wmem_alloc) <= 2 * sk->sk_sndbuf) skb = sock_wmalloc(sk, alloclen + hh_len + 15, 1, @@ -1145,7 +1146,7 @@ alloc_new_skb: skb->len += copy; skb->data_len += copy; skb->truesize += copy; - atomic_add(copy, &sk->sk_wmem_alloc); + refcount_add(copy, &sk->sk_wmem_alloc); } offset += copy; length -= copy; @@ -1369,7 +1370,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, skb->len += len; skb->data_len += len; skb->truesize += len; - atomic_add(len, &sk->sk_wmem_alloc); + refcount_add(len, &sk->sk_wmem_alloc); offset += len; size -= len; } diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index baf196eaf1d8..2f39479be92f 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -134,10 +134,12 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, struct metadata_dst *res; struct ip_tunnel_info *dst, *src; - if (!md || md->u.tun_info.mode & IP_TUNNEL_INFO_TX) + if (!md || md->type != METADATA_IP_TUNNEL || + md->u.tun_info.mode & IP_TUNNEL_INFO_TX) + return NULL; - res = metadata_dst_alloc(0, flags); + res = metadata_dst_alloc(0, METADATA_IP_TUNNEL, flags); if (!res) return NULL; @@ -228,14 +230,16 @@ static const struct nla_policy ip_tun_policy[LWTUNNEL_IP_MAX + 1] = { static int ip_tun_build_state(struct nlattr *attr, unsigned int family, const void *cfg, - struct lwtunnel_state **ts) + struct lwtunnel_state **ts, + struct netlink_ext_ack *extack) { struct ip_tunnel_info *tun_info; struct lwtunnel_state *new_state; struct nlattr *tb[LWTUNNEL_IP_MAX + 1]; int err; - err = nla_parse_nested(tb, LWTUNNEL_IP_MAX, attr, ip_tun_policy, NULL); + err = nla_parse_nested(tb, LWTUNNEL_IP_MAX, attr, ip_tun_policy, + extack); if (err < 0) return err; @@ -325,7 +329,8 @@ static const struct nla_policy ip6_tun_policy[LWTUNNEL_IP6_MAX + 1] = { static int ip6_tun_build_state(struct nlattr *attr, unsigned int family, const void *cfg, - struct lwtunnel_state **ts) + struct lwtunnel_state **ts, + struct netlink_ext_ack *extack) { struct ip_tunnel_info *tun_info; struct lwtunnel_state *new_state; @@ -333,7 +338,7 @@ static int ip6_tun_build_state(struct nlattr *attr, int err; err = nla_parse_nested(tb, LWTUNNEL_IP6_MAX, attr, ip6_tun_policy, - NULL); + extack); if (err < 0) return err; diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 4ec9affb2252..0192c255e508 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -465,7 +465,8 @@ static struct pernet_operations vti_net_ops = { .size = sizeof(struct ip_tunnel_net), }; -static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) +static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) { return 0; } @@ -503,7 +504,8 @@ static void vti_netlink_parms(struct nlattr *data[], } static int vti_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) { struct ip_tunnel_parm parms; __u32 fwmark = 0; @@ -513,7 +515,8 @@ static int vti_newlink(struct net *src_net, struct net_device *dev, } static int vti_changelink(struct net_device *dev, struct nlattr *tb[], - struct nlattr *data[]) + struct nlattr *data[], + struct netlink_ext_ack *extack) { struct ip_tunnel *t = netdev_priv(dev); __u32 fwmark = t->fwmark; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index c3b12b1c7162..4c5dfe6bd34d 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -813,8 +813,7 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d if (!skb) return; skb_reserve(skb, hlen); - b = (struct bootp_pkt *) skb_put(skb, sizeof(struct bootp_pkt)); - memset(b, 0, sizeof(struct bootp_pkt)); + b = skb_put_zero(skb, sizeof(struct bootp_pkt)); /* Construct IP header */ skb_reset_network_header(skb); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 1e441c6f2160..fb1ad22b5e29 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -375,7 +375,8 @@ static int ipip_tunnel_init(struct net_device *dev) return ip_tunnel_init(dev); } -static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) +static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) { u8 proto; @@ -469,7 +470,8 @@ static bool ipip_netlink_encap_parms(struct nlattr *data[], } static int ipip_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm p; @@ -488,7 +490,8 @@ static int ipip_newlink(struct net *src_net, struct net_device *dev, } static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], - struct nlattr *data[]) + struct nlattr *data[], + struct netlink_ext_ack *extack) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm p; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8ae425cad818..bb909f1d7537 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -109,6 +109,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm); static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, int cmd); +static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt); static void mroute_clean_tables(struct mr_table *mrt, bool all); static void ipmr_expire_process(unsigned long arg); @@ -669,7 +670,8 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) { if (ip_hdr(skb)->version == 0) { - struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); + struct nlmsghdr *nlh = skb_pull(skb, + sizeof(struct iphdr)); nlh->nlmsg_type = NLMSG_ERROR; nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); skb_trim(skb, nlh->nlmsg_len); @@ -972,7 +974,8 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, /* Play the pending entries through our router */ while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { if (ip_hdr(skb)->version == 0) { - struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); + struct nlmsghdr *nlh = skb_pull(skb, + sizeof(struct iphdr)); if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) { nlh->nlmsg_len = skb_tail_pointer(skb) - @@ -993,8 +996,7 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, } } -/* Bounce a cache query up to mrouted. We could use netlink for this but mrouted - * expects the following bizarre scheme. +/* Bounce a cache query up to mrouted and netlink. * * Called under mrt_lock. */ @@ -1044,7 +1046,7 @@ static int ipmr_cache_report(struct mr_table *mrt, msg->im_vif = vifi; skb_dst_set(skb, dst_clone(skb_dst(pkt))); /* Add our header */ - igmp = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); + igmp = skb_put(skb, sizeof(struct igmphdr)); igmp->type = assert; msg->im_msgtype = assert; igmp->code = 0; @@ -1060,6 +1062,8 @@ static int ipmr_cache_report(struct mr_table *mrt, return -EINVAL; } + igmpmsg_netlink_event(mrt, skb); + /* Deliver to mrouted */ ret = sock_queue_rcv_skb(mroute_sk, skb); rcu_read_unlock(); @@ -2339,6 +2343,130 @@ errout: rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err); } +static size_t igmpmsg_netlink_msgsize(size_t payloadlen) +{ + size_t len = + NLMSG_ALIGN(sizeof(struct rtgenmsg)) + + nla_total_size(1) /* IPMRA_CREPORT_MSGTYPE */ + + nla_total_size(4) /* IPMRA_CREPORT_VIF_ID */ + + nla_total_size(4) /* IPMRA_CREPORT_SRC_ADDR */ + + nla_total_size(4) /* IPMRA_CREPORT_DST_ADDR */ + /* IPMRA_CREPORT_PKT */ + + nla_total_size(payloadlen) + ; + + return len; +} + +static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt) +{ + struct net *net = read_pnet(&mrt->net); + struct nlmsghdr *nlh; + struct rtgenmsg *rtgenm; + struct igmpmsg *msg; + struct sk_buff *skb; + struct nlattr *nla; + int payloadlen; + + payloadlen = pkt->len - sizeof(struct igmpmsg); + msg = (struct igmpmsg *)skb_network_header(pkt); + + skb = nlmsg_new(igmpmsg_netlink_msgsize(payloadlen), GFP_ATOMIC); + if (!skb) + goto errout; + + nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT, + sizeof(struct rtgenmsg), 0); + if (!nlh) + goto errout; + rtgenm = nlmsg_data(nlh); + rtgenm->rtgen_family = RTNL_FAMILY_IPMR; + if (nla_put_u8(skb, IPMRA_CREPORT_MSGTYPE, msg->im_msgtype) || + nla_put_u32(skb, IPMRA_CREPORT_VIF_ID, msg->im_vif) || + nla_put_in_addr(skb, IPMRA_CREPORT_SRC_ADDR, + msg->im_src.s_addr) || + nla_put_in_addr(skb, IPMRA_CREPORT_DST_ADDR, + msg->im_dst.s_addr)) + goto nla_put_failure; + + nla = nla_reserve(skb, IPMRA_CREPORT_PKT, payloadlen); + if (!nla || skb_copy_bits(pkt, sizeof(struct igmpmsg), + nla_data(nla), payloadlen)) + goto nla_put_failure; + + nlmsg_end(skb, nlh); + + rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE_R, NULL, GFP_ATOMIC); + return; + +nla_put_failure: + nlmsg_cancel(skb, nlh); +errout: + kfree_skb(skb); + rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS); +} + +static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(in_skb->sk); + struct nlattr *tb[RTA_MAX + 1]; + struct sk_buff *skb = NULL; + struct mfc_cache *cache; + struct mr_table *mrt; + struct rtmsg *rtm; + __be32 src, grp; + u32 tableid; + int err; + + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv4_policy, extack); + if (err < 0) + goto errout; + + rtm = nlmsg_data(nlh); + + src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; + grp = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; + tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0; + + mrt = ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT); + if (IS_ERR(mrt)) { + err = PTR_ERR(mrt); + goto errout_free; + } + + /* entries are added/deleted only under RTNL */ + rcu_read_lock(); + cache = ipmr_cache_find(mrt, src, grp); + rcu_read_unlock(); + if (!cache) { + err = -ENOENT; + goto errout_free; + } + + skb = nlmsg_new(mroute_msgsize(false, mrt->maxvif), GFP_KERNEL); + if (!skb) { + err = -ENOBUFS; + goto errout_free; + } + + err = ipmr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, cache, + RTM_NEWROUTE, 0); + if (err < 0) + goto errout_free; + + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); + +errout: + return err; + +errout_free: + kfree_skb(skb); + goto errout; +} + static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); @@ -2526,6 +2654,129 @@ static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh, return ipmr_mfc_delete(tbl, &mfcc, parent); } +static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb) +{ + u32 queue_len = atomic_read(&mrt->cache_resolve_queue_len); + + if (nla_put_u32(skb, IPMRA_TABLE_ID, mrt->id) || + nla_put_u32(skb, IPMRA_TABLE_CACHE_RES_QUEUE_LEN, queue_len) || + nla_put_s32(skb, IPMRA_TABLE_MROUTE_REG_VIF_NUM, + mrt->mroute_reg_vif_num) || + nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_ASSERT, + mrt->mroute_do_assert) || + nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, mrt->mroute_do_pim)) + return false; + + return true; +} + +static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb) +{ + struct nlattr *vif_nest; + struct vif_device *vif; + + /* if the VIF doesn't exist just continue */ + if (!VIF_EXISTS(mrt, vifid)) + return true; + + vif = &mrt->vif_table[vifid]; + vif_nest = nla_nest_start(skb, IPMRA_VIF); + if (!vif_nest) + return false; + if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif->dev->ifindex) || + nla_put_u32(skb, IPMRA_VIFA_VIF_ID, vifid) || + nla_put_u16(skb, IPMRA_VIFA_FLAGS, vif->flags) || + nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_IN, vif->bytes_in, + IPMRA_VIFA_PAD) || + nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_OUT, vif->bytes_out, + IPMRA_VIFA_PAD) || + nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_IN, vif->pkt_in, + IPMRA_VIFA_PAD) || + nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_OUT, vif->pkt_out, + IPMRA_VIFA_PAD) || + nla_put_be32(skb, IPMRA_VIFA_LOCAL_ADDR, vif->local) || + nla_put_be32(skb, IPMRA_VIFA_REMOTE_ADDR, vif->remote)) { + nla_nest_cancel(skb, vif_nest); + return false; + } + nla_nest_end(skb, vif_nest); + + return true; +} + +static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct nlmsghdr *nlh = NULL; + unsigned int t = 0, s_t; + unsigned int e = 0, s_e; + struct mr_table *mrt; + + s_t = cb->args[0]; + s_e = cb->args[1]; + + ipmr_for_each_table(mrt, net) { + struct nlattr *vifs, *af; + struct ifinfomsg *hdr; + u32 i; + + if (t < s_t) + goto skip_table; + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, RTM_NEWLINK, + sizeof(*hdr), NLM_F_MULTI); + if (!nlh) + break; + + hdr = nlmsg_data(nlh); + memset(hdr, 0, sizeof(*hdr)); + hdr->ifi_family = RTNL_FAMILY_IPMR; + + af = nla_nest_start(skb, IFLA_AF_SPEC); + if (!af) { + nlmsg_cancel(skb, nlh); + goto out; + } + + if (!ipmr_fill_table(mrt, skb)) { + nlmsg_cancel(skb, nlh); + goto out; + } + + vifs = nla_nest_start(skb, IPMRA_TABLE_VIFS); + if (!vifs) { + nla_nest_end(skb, af); + nlmsg_end(skb, nlh); + goto out; + } + for (i = 0; i < mrt->maxvif; i++) { + if (e < s_e) + goto skip_entry; + if (!ipmr_fill_vif(mrt, i, skb)) { + nla_nest_end(skb, vifs); + nla_nest_end(skb, af); + nlmsg_end(skb, nlh); + goto out; + } +skip_entry: + e++; + } + s_e = 0; + e = 0; + nla_nest_end(skb, vifs); + nla_nest_end(skb, af); + nlmsg_end(skb, nlh); +skip_table: + t++; + } + +out: + cb->args[1] = e; + cb->args[0] = t; + + return skb->len; +} + #ifdef CONFIG_PROC_FS /* The /proc interfaces to multicast routing : * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif @@ -2863,11 +3114,14 @@ int __init ip_mr_init(void) } #endif rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, - NULL, ipmr_rtm_dumproute, NULL); + ipmr_rtm_getroute, ipmr_rtm_dumproute, NULL); rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE, ipmr_rtm_route, NULL, NULL); rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE, ipmr_rtm_route, NULL, NULL); + + rtnl_register(RTNL_FAMILY_IPMR, RTM_GETLINK, + NULL, ipmr_rtm_dumplink, NULL); return 0; #ifdef CONFIG_IP_PIMSM_V2 diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 038f293c2376..7d72decb80f9 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -47,7 +47,7 @@ struct clusterip_config { __be32 clusterip; /* the IP address */ u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ - struct net_device *dev; /* device */ + int ifindex; /* device ifindex */ u_int16_t num_total_nodes; /* total number of nodes */ unsigned long local_nodes; /* node number array */ @@ -57,6 +57,9 @@ struct clusterip_config { enum clusterip_hashmode hash_mode; /* which hashing mode */ u_int32_t hash_initval; /* hash initialization */ struct rcu_head rcu; + + char ifname[IFNAMSIZ]; /* device ifname */ + struct notifier_block notifier; /* refresh c->ifindex in it */ }; #ifdef CONFIG_PROC_FS @@ -98,9 +101,8 @@ clusterip_config_put(struct clusterip_config *c) * entry(rule) is removed, remove the config from lists, but don't free it * yet, since proc-files could still be holding references */ static inline void -clusterip_config_entry_put(struct clusterip_config *c) +clusterip_config_entry_put(struct net *net, struct clusterip_config *c) { - struct net *net = dev_net(c->dev); struct clusterip_net *cn = net_generic(net, clusterip_net_id); local_bh_disable(); @@ -109,8 +111,7 @@ clusterip_config_entry_put(struct clusterip_config *c) spin_unlock(&cn->lock); local_bh_enable(); - dev_mc_del(c->dev, c->clustermac); - dev_put(c->dev); + unregister_netdevice_notifier(&c->notifier); /* In case anyone still accesses the file, the open/close * functions are also incrementing the refcount on their own, @@ -170,19 +171,55 @@ clusterip_config_init_nodelist(struct clusterip_config *c, set_bit(i->local_nodes[n] - 1, &c->local_nodes); } -static struct clusterip_config * -clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip, - struct net_device *dev) +static int +clusterip_netdev_event(struct notifier_block *this, unsigned long event, + void *ptr) { - struct net *net = dev_net(dev); + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct clusterip_config *c; + + c = container_of(this, struct clusterip_config, notifier); + switch (event) { + case NETDEV_REGISTER: + if (!strcmp(dev->name, c->ifname)) { + c->ifindex = dev->ifindex; + dev_mc_add(dev, c->clustermac); + } + break; + case NETDEV_UNREGISTER: + if (dev->ifindex == c->ifindex) { + dev_mc_del(dev, c->clustermac); + c->ifindex = -1; + } + break; + case NETDEV_CHANGENAME: + if (!strcmp(dev->name, c->ifname)) { + c->ifindex = dev->ifindex; + dev_mc_add(dev, c->clustermac); + } else if (dev->ifindex == c->ifindex) { + dev_mc_del(dev, c->clustermac); + c->ifindex = -1; + } + break; + } + + return NOTIFY_DONE; +} + +static struct clusterip_config * +clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i, + __be32 ip, const char *iniface) +{ struct clusterip_net *cn = net_generic(net, clusterip_net_id); + struct clusterip_config *c; + int err; c = kzalloc(sizeof(*c), GFP_ATOMIC); if (!c) return ERR_PTR(-ENOMEM); - c->dev = dev; + strcpy(c->ifname, iniface); + c->ifindex = -1; c->clusterip = ip; memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); c->num_total_nodes = i->num_total_nodes; @@ -213,17 +250,27 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip, cn->procdir, &clusterip_proc_fops, c); if (!c->pde) { - spin_lock_bh(&cn->lock); - list_del_rcu(&c->list); - spin_unlock_bh(&cn->lock); - kfree(c); - - return ERR_PTR(-ENOMEM); + err = -ENOMEM; + goto err; } } #endif - return c; + c->notifier.notifier_call = clusterip_netdev_event; + err = register_netdevice_notifier(&c->notifier); + if (!err) + return c; + +#ifdef CONFIG_PROC_FS + proc_remove(c->pde); +err: +#endif + spin_lock_bh(&cn->lock); + list_del_rcu(&c->list); + spin_unlock_bh(&cn->lock); + kfree(c); + + return ERR_PTR(err); } #ifdef CONFIG_PROC_FS @@ -425,14 +472,13 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) e->ip.iniface); return -ENOENT; } + dev_put(dev); - config = clusterip_config_init(cipinfo, - e->ip.dst.s_addr, dev); - if (IS_ERR(config)) { - dev_put(dev); + config = clusterip_config_init(par->net, cipinfo, + e->ip.dst.s_addr, + e->ip.iniface); + if (IS_ERR(config)) return PTR_ERR(config); - } - dev_mc_add(config->dev, config->clustermac); } } cipinfo->config = config; @@ -458,7 +504,7 @@ static void clusterip_tg_destroy(const struct xt_tgdtor_param *par) /* if no more entries are referencing the config, remove it * from the list and destroy the proc entry */ - clusterip_config_entry_put(cipinfo->config); + clusterip_config_entry_put(par->net, cipinfo->config); clusterip_config_put(cipinfo->config); @@ -558,10 +604,9 @@ arp_mangle(void *priv, * addresses on different interfacs. However, in the CLUSTERIP case * this wouldn't work, since we didn't subscribe the mcast group on * other interfaces */ - if (c->dev != state->out) { - pr_debug("not mangling arp reply on different " - "interface: cip'%s'-skb'%s'\n", - c->dev->name, state->out->name); + if (c->ifindex != state->out->ifindex) { + pr_debug("not mangling arp reply on different interface: cip'%d'-skb'%d'\n", + c->ifindex, state->out->ifindex); clusterip_config_put(c); return NF_ACCEPT; } @@ -743,14 +788,20 @@ static const struct file_operations clusterip_proc_fops = { static int clusterip_net_init(struct net *net) { struct clusterip_net *cn = net_generic(net, clusterip_net_id); + int ret; INIT_LIST_HEAD(&cn->configs); spin_lock_init(&cn->lock); + ret = nf_register_net_hook(net, &cip_arp_ops); + if (ret < 0) + return ret; + #ifdef CONFIG_PROC_FS cn->procdir = proc_mkdir("ipt_CLUSTERIP", net->proc_net); if (!cn->procdir) { + nf_unregister_net_hook(net, &cip_arp_ops); pr_err("Unable to proc dir entry\n"); return -ENOMEM; } @@ -765,6 +816,7 @@ static void clusterip_net_exit(struct net *net) struct clusterip_net *cn = net_generic(net, clusterip_net_id); proc_remove(cn->procdir); #endif + nf_unregister_net_hook(net, &cip_arp_ops); } static struct pernet_operations clusterip_net_ops = { @@ -786,17 +838,11 @@ static int __init clusterip_tg_init(void) if (ret < 0) goto cleanup_subsys; - ret = nf_register_hook(&cip_arp_ops); - if (ret < 0) - goto cleanup_target; - pr_info("ClusterIP Version %s loaded successfully\n", CLUSTERIP_VERSION); return 0; -cleanup_target: - xt_unregister_target(&clusterip_tg_reg); cleanup_subsys: unregister_pernet_subsys(&clusterip_net_ops); return ret; @@ -806,7 +852,6 @@ static void __exit clusterip_tg_exit(void) { pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION); - nf_unregister_hook(&cip_arp_ops); xt_unregister_target(&clusterip_tg_reg); unregister_pernet_subsys(&clusterip_net_ops); diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index af2b69b6895f..f1528f7175a8 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -24,7 +24,7 @@ synproxy_build_ip(struct net *net, struct sk_buff *skb, __be32 saddr, struct iphdr *iph; skb_reset_network_header(skb); - iph = (struct iphdr *)skb_put(skb, sizeof(*iph)); + iph = skb_put(skb, sizeof(*iph)); iph->version = 4; iph->ihl = sizeof(*iph) / 4; iph->tos = 0; @@ -91,7 +91,7 @@ synproxy_send_client_synack(struct net *net, niph = synproxy_build_ip(net, nskb, iph->daddr, iph->saddr); skb_reset_transport_header(nskb); - nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); + nth = skb_put(nskb, tcp_hdr_size); nth->source = th->dest; nth->dest = th->source; nth->seq = htonl(__cookie_v4_init_sequence(iph, th, &mss)); @@ -133,7 +133,7 @@ synproxy_send_server_syn(struct net *net, niph = synproxy_build_ip(net, nskb, iph->saddr, iph->daddr); skb_reset_transport_header(nskb); - nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); + nth = skb_put(nskb, tcp_hdr_size); nth->source = th->source; nth->dest = th->dest; nth->seq = htonl(recv_seq - 1); @@ -178,7 +178,7 @@ synproxy_send_server_ack(struct net *net, niph = synproxy_build_ip(net, nskb, iph->daddr, iph->saddr); skb_reset_transport_header(nskb); - nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); + nth = skb_put(nskb, tcp_hdr_size); nth->source = th->dest; nth->dest = th->source; nth->seq = htonl(ntohl(th->ack_seq)); @@ -216,7 +216,7 @@ synproxy_send_client_ack(struct net *net, niph = synproxy_build_ip(net, nskb, iph->saddr, iph->daddr); skb_reset_transport_header(nskb); - nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); + nth = skb_put(nskb, tcp_hdr_size); nth->source = th->source; nth->dest = th->dest; nth->seq = htonl(ntohl(th->seq) + 1); diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c index dc1dea15c1b4..f39037fca923 100644 --- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c @@ -98,8 +98,8 @@ static int masq_device_event(struct notifier_block *this, */ NF_CT_ASSERT(dev->ifindex != 0); - nf_ct_iterate_cleanup(net, device_cmp, - (void *)(long)dev->ifindex, 0, 0); + nf_ct_iterate_cleanup_net(net, device_cmp, + (void *)(long)dev->ifindex, 0, 0); } return NOTIFY_DONE; diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 7cd8d0d918f8..eeacbdaf7cdf 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -51,7 +51,7 @@ struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb, struct iphdr *niph, *oiph = ip_hdr(oldskb); skb_reset_network_header(nskb); - niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr)); + niph = skb_put(nskb, sizeof(struct iphdr)); niph->version = 4; niph->ihl = sizeof(struct iphdr) / 4; niph->tos = 0; @@ -76,8 +76,7 @@ void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb, struct tcphdr *tcph; skb_reset_transport_header(nskb); - tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); - memset(tcph, 0, sizeof(*tcph)); + tcph = skb_put_zero(nskb, sizeof(struct tcphdr)); tcph->source = oth->dest; tcph->dest = oth->source; tcph->doff = sizeof(struct tcphdr) / 4; @@ -172,7 +171,7 @@ void nf_send_unreach(struct sk_buff *skb_in, int code, int hook) struct iphdr *iph = ip_hdr(skb_in); u8 proto; - if (skb_in->csum_bad || iph->frag_off & htons(IP_OFFSET)) + if (iph->frag_off & htons(IP_OFFSET)) return; if (skb_csum_unnecessary(skb_in)) { diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index ccfbce13a633..b8f0db54b197 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -290,7 +290,7 @@ void ping_close(struct sock *sk, long timeout) { pr_debug("ping_close(sk=%p,sk->num=%u)\n", inet_sk(sk), inet_sk(sk)->inet_num); - pr_debug("isk->refcnt = %d\n", sk->sk_refcnt.counter); + pr_debug("isk->refcnt = %d\n", refcount_read(&sk->sk_refcnt)); sk_common_release(sk); } @@ -1127,7 +1127,7 @@ static void ping_v4_format_sock(struct sock *sp, struct seq_file *f, 0, 0L, 0, from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)), 0, sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp, + refcount_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); } diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index fa44e752a9a3..43eb6567b3a0 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -250,6 +250,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPAbortOnLinger", LINUX_MIB_TCPABORTONLINGER), SNMP_MIB_ITEM("TCPAbortFailed", LINUX_MIB_TCPABORTFAILED), SNMP_MIB_ITEM("TCPMemoryPressures", LINUX_MIB_TCPMEMORYPRESSURES), + SNMP_MIB_ITEM("TCPMemoryPressuresChrono", LINUX_MIB_TCPMEMORYPRESSURESCHRONO), SNMP_MIB_ITEM("TCPSACKDiscard", LINUX_MIB_TCPSACKDISCARD), SNMP_MIB_ITEM("TCPDSACKIgnoredOld", LINUX_MIB_TCPDSACKIGNOREDOLD), SNMP_MIB_ITEM("TCPDSACKIgnoredNoUndo", LINUX_MIB_TCPDSACKIGNOREDNOUNDO), diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index bdffad875691..b0bb5d0a30bd 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -1063,7 +1063,7 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) 0, 0L, 0, from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), 0, sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); + refcount_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); } static int raw_seq_show(struct seq_file *seq, void *v) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6883b3d4ba8f..c816cd53f7fc 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -114,6 +114,8 @@ #include <net/ip_tunnels.h> #include <net/l3mdev.h> +#include "fib_lookup.h" + #define RT_FL_TOS(oldflp4) \ ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)) @@ -587,11 +589,6 @@ static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk, build_sk_flow_key(fl4, sk); } -static inline void rt_free(struct rtable *rt) -{ - call_rcu(&rt->dst.rcu_head, dst_rcu_free); -} - static DEFINE_SPINLOCK(fnhe_lock); static void fnhe_flush_routes(struct fib_nh_exception *fnhe) @@ -601,12 +598,14 @@ static void fnhe_flush_routes(struct fib_nh_exception *fnhe) rt = rcu_dereference(fnhe->fnhe_rth_input); if (rt) { RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL); - rt_free(rt); + dst_dev_put(&rt->dst); + dst_release(&rt->dst); } rt = rcu_dereference(fnhe->fnhe_rth_output); if (rt) { RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL); - rt_free(rt); + dst_dev_put(&rt->dst); + dst_release(&rt->dst); } } @@ -1300,7 +1299,7 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) } static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, - __be32 daddr) + __be32 daddr, const bool do_cache) { bool ret = false; @@ -1329,10 +1328,13 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, if (!rt->rt_gateway) rt->rt_gateway = daddr; - if (!(rt->dst.flags & DST_NOCACHE)) { + if (do_cache) { + dst_hold(&rt->dst); rcu_assign_pointer(*porig, rt); - if (orig) - rt_free(orig); + if (orig) { + dst_dev_put(&orig->dst); + dst_release(&orig->dst); + } ret = true; } @@ -1355,12 +1357,20 @@ static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) } orig = *p; + /* hold dst before doing cmpxchg() to avoid race condition + * on this dst + */ + dst_hold(&rt->dst); prev = cmpxchg(p, orig, rt); if (prev == orig) { - if (orig) - rt_free(orig); - } else + if (orig) { + dst_dev_put(&orig->dst); + dst_release(&orig->dst); + } + } else { + dst_release(&rt->dst); ret = false; + } return ret; } @@ -1431,7 +1441,8 @@ static bool rt_cache_valid(const struct rtable *rt) static void rt_set_nexthop(struct rtable *rt, __be32 daddr, const struct fib_result *res, struct fib_nh_exception *fnhe, - struct fib_info *fi, u16 type, u32 itag) + struct fib_info *fi, u16 type, u32 itag, + const bool do_cache) { bool cached = false; @@ -1452,8 +1463,8 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, #endif rt->dst.lwtstate = lwtstate_get(nh->nh_lwtstate); if (unlikely(fnhe)) - cached = rt_bind_exception(rt, fnhe, daddr); - else if (!(rt->dst.flags & DST_NOCACHE)) + cached = rt_bind_exception(rt, fnhe, daddr, do_cache); + else if (do_cache) cached = rt_cache_route(nh, rt); if (unlikely(!cached)) { /* Routes we intend to cache in nexthop exception or @@ -1461,7 +1472,6 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, * However, if we are unsuccessful at storing this * route into the cache we really need to set it. */ - rt->dst.flags |= DST_NOCACHE; if (!rt->rt_gateway) rt->rt_gateway = daddr; rt_add_uncached_list(rt); @@ -1484,7 +1494,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev, struct rtable *rt; rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, - (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) | + (will_cache ? 0 : DST_HOST) | (nopolicy ? DST_NOPOLICY : 0) | (noxfrm ? DST_NOXFRM : 0)); @@ -1728,7 +1738,8 @@ rt_cache: rth->dst.input = ip_forward; - rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag); + rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag, + do_cache); set_lwt_redirect(rth); skb_dst_set(skb, &rth->dst); out: @@ -1860,9 +1871,9 @@ static int ip_mkroute_input(struct sk_buff *skb, */ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev) + u8 tos, struct net_device *dev, + struct fib_result *res) { - struct fib_result res; struct in_device *in_dev = __in_dev_get_rcu(dev); struct ip_tunnel_info *tun_info; struct flowi4 fl4; @@ -1892,8 +1903,8 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) goto martian_source; - res.fi = NULL; - res.table = NULL; + res->fi = NULL; + res->table = NULL; if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0)) goto brd_input; @@ -1929,17 +1940,17 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, fl4.daddr = daddr; fl4.saddr = saddr; fl4.flowi4_uid = sock_net_uid(net, NULL); - err = fib_lookup(net, &fl4, &res, 0); + err = fib_lookup(net, &fl4, res, 0); if (err != 0) { if (!IN_DEV_FORWARD(in_dev)) err = -EHOSTUNREACH; goto no_route; } - if (res.type == RTN_BROADCAST) + if (res->type == RTN_BROADCAST) goto brd_input; - if (res.type == RTN_LOCAL) { + if (res->type == RTN_LOCAL) { err = fib_validate_source(skb, saddr, daddr, tos, 0, dev, in_dev, &itag); if (err < 0) @@ -1951,10 +1962,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, err = -EHOSTUNREACH; goto no_route; } - if (res.type != RTN_UNICAST) + if (res->type != RTN_UNICAST) goto martian_destination; - err = ip_mkroute_input(skb, &res, in_dev, daddr, saddr, tos); + err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos); out: return err; brd_input: @@ -1968,14 +1979,14 @@ brd_input: goto martian_source; } flags |= RTCF_BROADCAST; - res.type = RTN_BROADCAST; + res->type = RTN_BROADCAST; RT_CACHE_STAT_INC(in_brd); local_input: do_cache = false; - if (res.fi) { + if (res->fi) { if (!itag) { - rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input); + rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); err = 0; @@ -1986,7 +1997,7 @@ local_input: } rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev, - flags | RTCF_LOCAL, res.type, + flags | RTCF_LOCAL, res->type, IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache); if (!rth) goto e_nobufs; @@ -1996,18 +2007,18 @@ local_input: rth->dst.tclassid = itag; #endif rth->rt_is_input = 1; - if (res.table) - rth->rt_table_id = res.table->tb_id; + if (res->table) + rth->rt_table_id = res->table->tb_id; RT_CACHE_STAT_INC(in_slow_tot); - if (res.type == RTN_UNREACHABLE) { + if (res->type == RTN_UNREACHABLE) { rth->dst.input= ip_error; rth->dst.error= -err; rth->rt_flags &= ~RTCF_LOCAL; } if (do_cache) { - struct fib_nh *nh = &FIB_RES_NH(res); + struct fib_nh *nh = &FIB_RES_NH(*res); rth->dst.lwtstate = lwtstate_get(nh->nh_lwtstate); if (lwtunnel_input_redirect(rth->dst.lwtstate)) { @@ -2016,10 +2027,8 @@ local_input: rth->dst.input = lwtunnel_input; } - if (unlikely(!rt_cache_route(nh, rth))) { - rth->dst.flags |= DST_NOCACHE; + if (unlikely(!rt_cache_route(nh, rth))) rt_add_uncached_list(rth); - } } skb_dst_set(skb, &rth->dst); err = 0; @@ -2027,9 +2036,9 @@ local_input: no_route: RT_CACHE_STAT_INC(in_no_route); - res.type = RTN_UNREACHABLE; - res.fi = NULL; - res.table = NULL; + res->type = RTN_UNREACHABLE; + res->fi = NULL; + res->table = NULL; goto local_input; /* @@ -2059,11 +2068,22 @@ martian_source: int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev) { - int res; + struct fib_result res; + int err; tos &= IPTOS_RT_MASK; rcu_read_lock(); + err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res); + rcu_read_unlock(); + return err; +} +EXPORT_SYMBOL(ip_route_input_noref); + +/* called with rcu_read_lock held */ +int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr, + u8 tos, struct net_device *dev, struct fib_result *res) +{ /* Multicast recognition logic is moved from route cache to here. The problem was that too many Ethernet cards have broken/missing hardware multicast filters :-( As result the host on multicasting @@ -2078,6 +2098,7 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (ipv4_is_multicast(daddr)) { struct in_device *in_dev = __in_dev_get_rcu(dev); int our = 0; + int err = -EINVAL; if (in_dev) our = ip_check_mc_rcu(in_dev, daddr, saddr, @@ -2093,7 +2114,6 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, ip_hdr(skb)->protocol); } - res = -EINVAL; if (our #ifdef CONFIG_IP_MROUTE || @@ -2101,17 +2121,14 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, IN_DEV_MFORWARD(in_dev)) #endif ) { - res = ip_route_input_mc(skb, daddr, saddr, + err = ip_route_input_mc(skb, daddr, saddr, tos, dev, our); } - rcu_read_unlock(); - return res; + return err; } - res = ip_route_input_slow(skb, daddr, saddr, tos, dev); - rcu_read_unlock(); - return res; + + return ip_route_input_slow(skb, daddr, saddr, tos, dev, res); } -EXPORT_SYMBOL(ip_route_input_noref); /* called with rcu_read_lock() */ static struct rtable *__mkroute_output(const struct fib_result *res, @@ -2207,10 +2224,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth = rcu_dereference(*prth); rt_cache: - if (rt_cache_valid(rth)) { - dst_hold(&rth->dst); + if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst)) return rth; - } } add: @@ -2244,7 +2259,7 @@ add: #endif } - rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0); + rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache); set_lwt_redirect(rth); return rth; @@ -2254,29 +2269,40 @@ add: * Major route resolver routine. */ -struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, - const struct sk_buff *skb) +struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, + const struct sk_buff *skb) { - struct net_device *dev_out = NULL; __u8 tos = RT_FL_TOS(fl4); - unsigned int flags = 0; struct fib_result res; struct rtable *rth; - int orig_oif; - int err = -ENETUNREACH; res.tclassid = 0; res.fi = NULL; res.table = NULL; - orig_oif = fl4->flowi4_oif; - fl4->flowi4_iif = LOOPBACK_IFINDEX; fl4->flowi4_tos = tos & IPTOS_RT_MASK; fl4->flowi4_scope = ((tos & RTO_ONLINK) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); rcu_read_lock(); + rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb); + rcu_read_unlock(); + + return rth; +} +EXPORT_SYMBOL_GPL(ip_route_output_key_hash); + +struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4, + struct fib_result *res, + const struct sk_buff *skb) +{ + struct net_device *dev_out = NULL; + int orig_oif = fl4->flowi4_oif; + unsigned int flags = 0; + struct rtable *rth; + int err = -ENETUNREACH; + if (fl4->saddr) { rth = ERR_PTR(-EINVAL); if (ipv4_is_multicast(fl4->saddr) || @@ -2362,15 +2388,15 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK); dev_out = net->loopback_dev; fl4->flowi4_oif = LOOPBACK_IFINDEX; - res.type = RTN_LOCAL; + res->type = RTN_LOCAL; flags |= RTCF_LOCAL; goto make_route; } - err = fib_lookup(net, fl4, &res, 0); + err = fib_lookup(net, fl4, res, 0); if (err) { - res.fi = NULL; - res.table = NULL; + res->fi = NULL; + res->table = NULL; if (fl4->flowi4_oif && (ipv4_is_multicast(fl4->daddr) || !netif_index_is_l3_master(net, fl4->flowi4_oif))) { @@ -2395,43 +2421,41 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, if (fl4->saddr == 0) fl4->saddr = inet_select_addr(dev_out, 0, RT_SCOPE_LINK); - res.type = RTN_UNICAST; + res->type = RTN_UNICAST; goto make_route; } rth = ERR_PTR(err); goto out; } - if (res.type == RTN_LOCAL) { + if (res->type == RTN_LOCAL) { if (!fl4->saddr) { - if (res.fi->fib_prefsrc) - fl4->saddr = res.fi->fib_prefsrc; + if (res->fi->fib_prefsrc) + fl4->saddr = res->fi->fib_prefsrc; else fl4->saddr = fl4->daddr; } /* L3 master device is the loopback for that domain */ - dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(res)) ? : + dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? : net->loopback_dev; fl4->flowi4_oif = dev_out->ifindex; flags |= RTCF_LOCAL; goto make_route; } - fib_select_path(net, &res, fl4, skb); + fib_select_path(net, res, fl4, skb); - dev_out = FIB_RES_DEV(res); + dev_out = FIB_RES_DEV(*res); fl4->flowi4_oif = dev_out->ifindex; make_route: - rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags); + rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags); out: - rcu_read_unlock(); return rth; } -EXPORT_SYMBOL_GPL(__ip_route_output_key_hash); static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie) { @@ -2485,7 +2509,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or new->input = dst_discard; new->output = dst_discard_out; - new->dev = ort->dst.dev; + new->dev = net->loopback_dev; if (new->dev) dev_hold(new->dev); @@ -2500,7 +2524,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_uses_gateway = ort->rt_uses_gateway; INIT_LIST_HEAD(&rt->rt_uncached); - dst_free(new); } dst_release(dst_orig); @@ -2525,9 +2548,10 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, } EXPORT_SYMBOL_GPL(ip_route_output_flow); +/* called with rcu_read_lock held */ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, struct flowi4 *fl4, struct sk_buff *skb, u32 portid, - u32 seq, int event) + u32 seq) { struct rtable *rt = skb_rtable(skb); struct rtmsg *r; @@ -2536,7 +2560,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, u32 error; u32 metrics[RTAX_MAX]; - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), 0); + nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), 0); if (!nlh) return -EMSGSIZE; @@ -2644,6 +2668,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct net *net = sock_net(in_skb->sk); struct rtmsg *rtm; struct nlattr *tb[RTA_MAX+1]; + struct fib_result res = {}; struct rtable *rt = NULL; struct flowi4 fl4; __be32 dst = 0; @@ -2700,10 +2725,12 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, fl4.flowi4_mark = mark; fl4.flowi4_uid = uid; + rcu_read_lock(); + if (iif) { struct net_device *dev; - dev = __dev_get_by_index(net, iif); + dev = dev_get_by_index_rcu(net, iif); if (!dev) { err = -ENODEV; goto errout_free; @@ -2712,14 +2739,14 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, skb->protocol = htons(ETH_P_IP); skb->dev = dev; skb->mark = mark; - err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); + err = ip_route_input_rcu(skb, dst, src, rtm->rtm_tos, + dev, &res); rt = skb_rtable(skb); if (err == 0 && rt->dst.error) err = -rt->dst.error; } else { - rt = ip_route_output_key(net, &fl4); - + rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb); err = 0; if (IS_ERR(rt)) err = PTR_ERR(rt); @@ -2735,17 +2762,25 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE) table_id = rt->rt_table_id; - err = rt_fill_info(net, dst, src, table_id, &fl4, skb, - NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, - RTM_NEWROUTE); + if (rtm->rtm_flags & RTM_F_FIB_MATCH) + err = fib_dump_info(skb, NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, RTM_NEWROUTE, table_id, + rt->rt_type, res.prefix, res.prefixlen, + fl4.flowi4_tos, res.fi, 0); + else + err = rt_fill_info(net, dst, src, table_id, &fl4, skb, + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq); if (err < 0) goto errout_free; + rcu_read_unlock(); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); errout: return err; errout_free: + rcu_read_unlock(); kfree_skb(skb); goto errout; } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 0257d965f111..0905cf04c2a4 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -66,10 +66,10 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, * Since subsequent timestamps use the normal tcp_time_stamp value, we * must make sure that the resulting initial timestamp is <= tcp_time_stamp. */ -__u32 cookie_init_timestamp(struct request_sock *req) +u64 cookie_init_timestamp(struct request_sock *req) { struct inet_request_sock *ireq; - u32 ts, ts_now = tcp_time_stamp; + u32 ts, ts_now = tcp_time_stamp_raw(); u32 options = 0; ireq = inet_rsk(req); @@ -88,7 +88,7 @@ __u32 cookie_init_timestamp(struct request_sock *req) ts <<= TSBITS; ts |= options; } - return ts; + return (u64)ts * (USEC_PER_SEC / TCP_TS_HZ); } @@ -213,7 +213,7 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst, NULL, &own_req); if (child) { - atomic_set(&req->rsk_refcnt, 1); + refcount_set(&req->rsk_refcnt, 1); tcp_sk(child)->tsoffset = tsoff; sock_rps_save_rxhash(child, skb); inet_csk_reqsk_queue_add(sk, req, child); @@ -232,7 +232,8 @@ EXPORT_SYMBOL(tcp_get_cookie_sock); * return false if we decode a tcp option that is disabled * on the host. */ -bool cookie_timestamp_decode(struct tcp_options_received *tcp_opt) +bool cookie_timestamp_decode(const struct net *net, + struct tcp_options_received *tcp_opt) { /* echoed timestamp, lowest bits contain options */ u32 options = tcp_opt->rcv_tsecr; @@ -242,12 +243,12 @@ bool cookie_timestamp_decode(struct tcp_options_received *tcp_opt) return true; } - if (!sysctl_tcp_timestamps) + if (!net->ipv4.sysctl_tcp_timestamps) return false; tcp_opt->sack_ok = (options & TS_OPT_SACK) ? TCP_SACK_SEEN : 0; - if (tcp_opt->sack_ok && !sysctl_tcp_sack) + if (tcp_opt->sack_ok && !net->ipv4.sysctl_tcp_sack) return false; if ((options & TS_OPT_WSCALE_MASK) == TS_OPT_WSCALE_MASK) @@ -256,7 +257,7 @@ bool cookie_timestamp_decode(struct tcp_options_received *tcp_opt) tcp_opt->wscale_ok = 1; tcp_opt->snd_wscale = options & TS_OPT_WSCALE_MASK; - return sysctl_tcp_window_scaling != 0; + return net->ipv4.sysctl_tcp_window_scaling != 0; } EXPORT_SYMBOL(cookie_timestamp_decode); @@ -312,14 +313,16 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, 0, NULL); + tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL); if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { - tsoff = secure_tcp_ts_off(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr); + tsoff = secure_tcp_ts_off(sock_net(sk), + ip_hdr(skb)->daddr, + ip_hdr(skb)->saddr); tcp_opt.rcv_tsecr -= tsoff; } - if (!cookie_timestamp_decode(&tcp_opt)) + if (!cookie_timestamp_decode(sock_net(sk), &tcp_opt)) goto out; ret = NULL; @@ -343,7 +346,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq->wscale_ok = tcp_opt.wscale_ok; ireq->tstamp_ok = tcp_opt.saw_tstamp; req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; - treq->snt_synack.v64 = 0; + treq->snt_synack = 0; treq->tfo_listener = false; ireq->ir_iif = inet_request_bound_dev_if(sk, skb); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 86957e9cd6c6..9bf809726066 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -360,32 +360,30 @@ static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table, ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (write && ret == 0) tcp_fastopen_active_timeout_reset(); + + return ret; +} + +static int proc_tcp_available_ulp(struct ctl_table *ctl, + int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + struct ctl_table tbl = { .maxlen = TCP_ULP_BUF_MAX, }; + int ret; + + tbl.data = kmalloc(tbl.maxlen, GFP_USER); + if (!tbl.data) + return -ENOMEM; + tcp_get_available_ulp(tbl.data, TCP_ULP_BUF_MAX); + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); + kfree(tbl.data); + return ret; } static struct ctl_table ipv4_table[] = { { - .procname = "tcp_timestamps", - .data = &sysctl_tcp_timestamps, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .procname = "tcp_window_scaling", - .data = &sysctl_tcp_window_scaling, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .procname = "tcp_sack", - .data = &sysctl_tcp_sack, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .procname = "tcp_retrans_collapse", .data = &sysctl_tcp_retrans_collapse, .maxlen = sizeof(int), @@ -707,6 +705,12 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec_ms_jiffies, }, { + .procname = "tcp_available_ulp", + .maxlen = TCP_ULP_BUF_MAX, + .mode = 0444, + .proc_handler = proc_tcp_available_ulp, + }, + { .procname = "icmp_msgs_per_sec", .data = &sysctl_icmp_msgs_per_sec, .maxlen = sizeof(int), @@ -1116,6 +1120,27 @@ static struct ctl_table ipv4_net_table[] = { .extra2 = &one, }, #endif + { + .procname = "tcp_sack", + .data = &init_net.ipv4.sysctl_tcp_sack, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "tcp_window_scaling", + .data = &init_net.ipv4.sysctl_tcp_window_scaling, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "tcp_timestamps", + .data = &init_net.ipv4.sysctl_tcp_timestamps, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 40aca7803cf2..71ce33decd97 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -320,17 +320,36 @@ struct tcp_splice_state { * All the __sk_mem_schedule() is of this nature: accounting * is strict, actions are advisory and have some latency. */ -int tcp_memory_pressure __read_mostly; -EXPORT_SYMBOL(tcp_memory_pressure); +unsigned long tcp_memory_pressure __read_mostly; +EXPORT_SYMBOL_GPL(tcp_memory_pressure); void tcp_enter_memory_pressure(struct sock *sk) { - if (!tcp_memory_pressure) { + unsigned long val; + + if (tcp_memory_pressure) + return; + val = jiffies; + + if (!val) + val--; + if (!cmpxchg(&tcp_memory_pressure, 0, val)) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURES); - tcp_memory_pressure = 1; - } } -EXPORT_SYMBOL(tcp_enter_memory_pressure); +EXPORT_SYMBOL_GPL(tcp_enter_memory_pressure); + +void tcp_leave_memory_pressure(struct sock *sk) +{ + unsigned long val; + + if (!tcp_memory_pressure) + return; + val = xchg(&tcp_memory_pressure, 0); + if (val) + NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURESCHRONO, + jiffies_to_msecs(jiffies - val)); +} +EXPORT_SYMBOL_GPL(tcp_leave_memory_pressure); /* Convert seconds to retransmits based on initial and max timeout */ static u8 secs_to_retrans(int seconds, int timeout, int rto_max) @@ -386,7 +405,7 @@ void tcp_init_sock(struct sock *sk) icsk->icsk_rto = TCP_TIMEOUT_INIT; tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); - minmax_reset(&tp->rtt_min, tcp_time_stamp, ~0U); + minmax_reset(&tp->rtt_min, tcp_jiffies32, ~0U); /* So many TCP implementations out there (incorrectly) count the * initial SYN frame in their delayed-ACK and congestion control @@ -645,7 +664,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, return skb->len < size_goal && sysctl_tcp_autocorking && skb != tcp_write_queue_head(sk) && - atomic_read(&sk->sk_wmem_alloc) > skb->truesize; + refcount_read(&sk->sk_wmem_alloc) > skb->truesize; } static void tcp_push(struct sock *sk, int flags, int mss_now, @@ -673,7 +692,7 @@ static void tcp_push(struct sock *sk, int flags, int mss_now, /* It is possible TX completion already happened * before we set TSQ_THROTTLED. */ - if (atomic_read(&sk->sk_wmem_alloc) > skb->truesize) + if (refcount_read(&sk->sk_wmem_alloc) > skb->truesize) return; } @@ -882,8 +901,8 @@ static int tcp_send_mss(struct sock *sk, int *size_goal, int flags) return mss_now; } -static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, - size_t size, int flags) +ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, + size_t size, int flags) { struct tcp_sock *tp = tcp_sk(sk); int mss_now, size_goal; @@ -1013,6 +1032,7 @@ out_err: } return sk_stream_error(sk, flags, err); } +EXPORT_SYMBOL_GPL(do_tcp_sendpages); int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags) @@ -2186,7 +2206,7 @@ adjudge_to_death: /* Now socket is owned by kernel and we acquire BH lock - to finish close. No need to check for user refs. + * to finish close. No need to check for user refs. */ local_bh_disable(); bh_lock_sock(sk); @@ -2461,7 +2481,25 @@ static int do_tcp_setsockopt(struct sock *sk, int level, name[val] = 0; lock_sock(sk); - err = tcp_set_congestion_control(sk, name); + err = tcp_set_congestion_control(sk, name, true); + release_sock(sk); + return err; + } + case TCP_ULP: { + char name[TCP_ULP_NAME_MAX]; + + if (optlen < 1) + return -EINVAL; + + val = strncpy_from_user(name, optval, + min_t(long, TCP_ULP_NAME_MAX - 1, + optlen)); + if (val < 0) + return -EFAULT; + name[val] = 0; + + lock_sock(sk); + err = tcp_set_ulp(sk, name); release_sock(sk); return err; } @@ -2482,7 +2520,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, case TCP_MAXSEG: /* Values greater than interface MTU won't take effect. However * at the point when this call is done we typically don't yet - * know which interface is going to be used */ + * know which interface is going to be used + */ if (val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW)) { err = -EINVAL; break; @@ -2677,8 +2716,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level, #ifdef CONFIG_TCP_MD5SIG case TCP_MD5SIG: + case TCP_MD5SIG_EXT: /* Read the IP->Key mappings from userspace */ - err = tp->af_specific->md5_parse(sk, optval, optlen); + err = tp->af_specific->md5_parse(sk, optname, optval, optlen); break; #endif case TCP_USER_TIMEOUT: @@ -2717,7 +2757,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, if (!tp->repair) err = -EPERM; else - tp->tsoffset = val - tcp_time_stamp; + tp->tsoffset = val - tcp_time_stamp_raw(); break; case TCP_REPAIR_WINDOW: err = tcp_repair_set_window(tp, optval, optlen); @@ -2768,7 +2808,7 @@ static void tcp_get_info_chrono_stats(const struct tcp_sock *tp, for (i = TCP_CHRONO_BUSY; i < __TCP_CHRONO_MAX; ++i) { stats[i] = tp->chrono_stat[i - 1]; if (i == tp->chrono_type) - stats[i] += tcp_time_stamp - tp->chrono_start; + stats[i] += tcp_jiffies32 - tp->chrono_start; stats[i] *= USEC_PER_SEC / HZ; total += stats[i]; } @@ -2852,7 +2892,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_retrans = tp->retrans_out; info->tcpi_fackets = tp->fackets_out; - now = tcp_time_stamp; + now = tcp_jiffies32; info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime); info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime); info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp); @@ -3020,6 +3060,21 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return -EFAULT; return 0; + case TCP_ULP: + if (get_user(len, optlen)) + return -EFAULT; + len = min_t(unsigned int, len, TCP_ULP_NAME_MAX); + if (!icsk->icsk_ulp_ops) { + if (put_user(0, optlen)) + return -EFAULT; + return 0; + } + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, icsk->icsk_ulp_ops->name, len)) + return -EFAULT; + return 0; + case TCP_THIN_LINEAR_TIMEOUTS: val = tp->thin_lto; break; @@ -3083,7 +3138,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, break; case TCP_TIMESTAMP: - val = tcp_time_stamp + tp->tsoffset; + val = tcp_time_stamp_raw() + tp->tsoffset; break; case TCP_NOTSENT_LOWAT: val = tp->notsent_lowat; diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index b89bce4c721e..dbcc9352a48f 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -52,10 +52,9 @@ * There is a public e-mail list for discussing BBR development and testing: * https://groups.google.com/forum/#!forum/bbr-dev * - * NOTE: BBR *must* be used with the fq qdisc ("man tc-fq") with pacing enabled, - * since pacing is integral to the BBR design and implementation. - * BBR without pacing would not function properly, and may incur unnecessary - * high packet loss rates. + * NOTE: BBR might be used with the fq qdisc ("man tc-fq") with pacing enabled, + * otherwise TCP stack falls back to an internal pacing using one high + * resolution timer per TCP socket and may use more resources. */ #include <linux/module.h> #include <net/tcp.h> @@ -92,7 +91,7 @@ struct bbr { struct minmax bw; /* Max recent delivery rate in pkts/uS << 24 */ u32 rtt_cnt; /* count of packet-timed rounds elapsed */ u32 next_rtt_delivered; /* scb->tx.delivered at end of round */ - struct skb_mstamp cycle_mstamp; /* time of this cycle phase start */ + u64 cycle_mstamp; /* time of this cycle phase start */ u32 mode:3, /* current bbr_mode in state machine */ prev_ca_state:3, /* CA state on previous ACK */ packet_conservation:1, /* use packet conservation? */ @@ -412,7 +411,7 @@ static bool bbr_is_next_cycle_phase(struct sock *sk, struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); bool is_full_length = - skb_mstamp_us_delta(&tp->delivered_mstamp, &bbr->cycle_mstamp) > + tcp_stamp_us_delta(tp->delivered_mstamp, bbr->cycle_mstamp) > bbr->min_rtt_us; u32 inflight, bw; @@ -498,7 +497,7 @@ static void bbr_reset_lt_bw_sampling_interval(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); - bbr->lt_last_stamp = tp->delivered_mstamp.stamp_jiffies; + bbr->lt_last_stamp = div_u64(tp->delivered_mstamp, USEC_PER_MSEC); bbr->lt_last_delivered = tp->delivered; bbr->lt_last_lost = tp->lost; bbr->lt_rtt_cnt = 0; @@ -552,7 +551,7 @@ static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs) struct bbr *bbr = inet_csk_ca(sk); u32 lost, delivered; u64 bw; - s32 t; + u32 t; if (bbr->lt_use_bw) { /* already using long-term rate, lt_bw? */ if (bbr->mode == BBR_PROBE_BW && bbr->round_start && @@ -604,15 +603,15 @@ static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs) return; /* Find average delivery rate in this sampling interval. */ - t = (s32)(tp->delivered_mstamp.stamp_jiffies - bbr->lt_last_stamp); - if (t < 1) - return; /* interval is less than one jiffy, so wait */ - t = jiffies_to_usecs(t); - /* Interval long enough for jiffies_to_usecs() to return a bogus 0? */ - if (t < 1) { + t = div_u64(tp->delivered_mstamp, USEC_PER_MSEC) - bbr->lt_last_stamp; + if ((s32)t < 1) + return; /* interval is less than one ms, so wait */ + /* Check if can multiply without overflow */ + if (t >= ~0U / USEC_PER_MSEC) { bbr_reset_lt_bw_sampling(sk); /* interval too long; reset */ return; } + t *= USEC_PER_MSEC; bw = (u64)delivered * BW_UNIT; do_div(bw, t); bbr_lt_bw_interval_done(sk, bw); @@ -731,12 +730,12 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) bool filter_expired; /* Track min RTT seen in the min_rtt_win_sec filter window: */ - filter_expired = after(tcp_time_stamp, + filter_expired = after(tcp_jiffies32, bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ); if (rs->rtt_us >= 0 && (rs->rtt_us <= bbr->min_rtt_us || filter_expired)) { bbr->min_rtt_us = rs->rtt_us; - bbr->min_rtt_stamp = tcp_time_stamp; + bbr->min_rtt_stamp = tcp_jiffies32; } if (bbr_probe_rtt_mode_ms > 0 && filter_expired && @@ -755,7 +754,7 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) /* Maintain min packets in flight for max(200 ms, 1 round). */ if (!bbr->probe_rtt_done_stamp && tcp_packets_in_flight(tp) <= bbr_cwnd_min_target) { - bbr->probe_rtt_done_stamp = tcp_time_stamp + + bbr->probe_rtt_done_stamp = tcp_jiffies32 + msecs_to_jiffies(bbr_probe_rtt_mode_ms); bbr->probe_rtt_round_done = 0; bbr->next_rtt_delivered = tp->delivered; @@ -763,8 +762,8 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) if (bbr->round_start) bbr->probe_rtt_round_done = 1; if (bbr->probe_rtt_round_done && - after(tcp_time_stamp, bbr->probe_rtt_done_stamp)) { - bbr->min_rtt_stamp = tcp_time_stamp; + after(tcp_jiffies32, bbr->probe_rtt_done_stamp)) { + bbr->min_rtt_stamp = tcp_jiffies32; bbr->restore_cwnd = 1; /* snap to prior_cwnd */ bbr_reset_mode(sk); } @@ -811,7 +810,7 @@ static void bbr_init(struct sock *sk) bbr->probe_rtt_done_stamp = 0; bbr->probe_rtt_round_done = 0; bbr->min_rtt_us = tcp_min_rtt(tp); - bbr->min_rtt_stamp = tcp_time_stamp; + bbr->min_rtt_stamp = tcp_jiffies32; minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */ @@ -826,10 +825,12 @@ static void bbr_init(struct sock *sk) bbr->idle_restart = 0; bbr->full_bw = 0; bbr->full_bw_cnt = 0; - bbr->cycle_mstamp.v64 = 0; + bbr->cycle_mstamp = 0; bbr->cycle_idx = 0; bbr_reset_lt_bw_sampling(sk); bbr_reset_startup_mode(sk); + + cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED); } static u32 bbr_sndbuf_expand(struct sock *sk) diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 36087bca9f48..609965f0e298 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -84,14 +84,14 @@ static void bictcp_init(struct sock *sk) static inline void bictcp_update(struct bictcp *ca, u32 cwnd) { if (ca->last_cwnd == cwnd && - (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32) + (s32)(tcp_jiffies32 - ca->last_time) <= HZ / 32) return; ca->last_cwnd = cwnd; - ca->last_time = tcp_time_stamp; + ca->last_time = tcp_jiffies32; if (ca->epoch_start == 0) /* record the beginning of an epoch */ - ca->epoch_start = tcp_time_stamp; + ca->epoch_start = tcp_jiffies32; /* start off normal */ if (cwnd <= low_window) { diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 324c9bcc5456..fde983f6376b 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -189,8 +189,8 @@ void tcp_init_congestion_control(struct sock *sk) INET_ECN_dontxmit(sk); } -static void tcp_reinit_congestion_control(struct sock *sk, - const struct tcp_congestion_ops *ca) +void tcp_reinit_congestion_control(struct sock *sk, + const struct tcp_congestion_ops *ca) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -333,8 +333,12 @@ out: return ret; } -/* Change congestion control for socket */ -int tcp_set_congestion_control(struct sock *sk, const char *name) +/* Change congestion control for socket. If load is false, then it is the + * responsibility of the caller to call tcp_init_congestion_control or + * tcp_reinit_congestion_control (if the current congestion control was + * already initialized. + */ +int tcp_set_congestion_control(struct sock *sk, const char *name, bool load) { struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_congestion_ops *ca; @@ -344,21 +348,29 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) return -EPERM; rcu_read_lock(); - ca = __tcp_ca_find_autoload(name); + if (!load) + ca = tcp_ca_find(name); + else + ca = __tcp_ca_find_autoload(name); /* No change asking for existing value */ if (ca == icsk->icsk_ca_ops) { icsk->icsk_ca_setsockopt = 1; goto out; } - if (!ca) + if (!ca) { err = -ENOENT; - else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || - ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) + } else if (!load) { + icsk->icsk_ca_ops = ca; + if (!try_module_get(ca->owner)) + err = -EBUSY; + } else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || + ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) { err = -EPERM; - else if (!try_module_get(ca->owner)) + } else if (!try_module_get(ca->owner)) { err = -EBUSY; - else + } else { tcp_reinit_congestion_control(sk, ca); + } out: rcu_read_unlock(); return err; diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 0683ba447d77..57ae5b5ae643 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -155,7 +155,7 @@ static void bictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) { if (event == CA_EVENT_TX_START) { struct bictcp *ca = inet_csk_ca(sk); - u32 now = tcp_time_stamp; + u32 now = tcp_jiffies32; s32 delta; delta = now - tcp_sk(sk)->lsndtime; @@ -231,21 +231,21 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked) ca->ack_cnt += acked; /* count the number of ACKed packets */ if (ca->last_cwnd == cwnd && - (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32) + (s32)(tcp_jiffies32 - ca->last_time) <= HZ / 32) return; /* The CUBIC function can update ca->cnt at most once per jiffy. * On all cwnd reduction events, ca->epoch_start is set to 0, * which will force a recalculation of ca->cnt. */ - if (ca->epoch_start && tcp_time_stamp == ca->last_time) + if (ca->epoch_start && tcp_jiffies32 == ca->last_time) goto tcp_friendliness; ca->last_cwnd = cwnd; - ca->last_time = tcp_time_stamp; + ca->last_time = tcp_jiffies32; if (ca->epoch_start == 0) { - ca->epoch_start = tcp_time_stamp; /* record beginning */ + ca->epoch_start = tcp_jiffies32; /* record beginning */ ca->ack_cnt = acked; /* start counting */ ca->tcp_cwnd = cwnd; /* syn with cubic */ @@ -276,7 +276,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked) * if the cwnd < 1 million packets !!! */ - t = (s32)(tcp_time_stamp - ca->epoch_start); + t = (s32)(tcp_jiffies32 - ca->epoch_start); t += msecs_to_jiffies(ca->delay_min >> 3); /* change the unit from HZ to bictcp_HZ */ t <<= BICTCP_HZ; @@ -448,7 +448,7 @@ static void bictcp_acked(struct sock *sk, const struct ack_sample *sample) return; /* Discard delay samples right after fast recovery */ - if (ca->epoch_start && (s32)(tcp_time_stamp - ca->epoch_start) < HZ) + if (ca->epoch_start && (s32)(tcp_jiffies32 - ca->epoch_start) < HZ) return; delay = (sample->rtt_us << 3) / USEC_PER_MSEC; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 4af82b914dd4..ce9c7fef200f 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -214,13 +214,14 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, TCP_TIMEOUT_INIT, TCP_RTO_MAX); - atomic_set(&req->rsk_refcnt, 2); + refcount_set(&req->rsk_refcnt, 2); /* Now finish processing the fastopen child socket. */ inet_csk(child)->icsk_af_ops->rebuild_header(child); tcp_init_congestion_control(child); tcp_mtup_init(child); tcp_init_metrics(child); + tcp_call_bpf(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB); tcp_init_buffer_space(child); tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 4a4d8e76738f..3eb78cde6ff0 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -104,7 +104,7 @@ static void measure_achieved_throughput(struct sock *sk, const struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_sock *tp = tcp_sk(sk); struct htcp *ca = inet_csk_ca(sk); - u32 now = tcp_time_stamp; + u32 now = tcp_jiffies32; if (icsk->icsk_ca_state == TCP_CA_Open) ca->pkts_acked = sample->pkts_acked; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 174d4376baa5..2920e0cb09f8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -76,9 +76,6 @@ #include <asm/unaligned.h> #include <linux/errqueue.h> -int sysctl_tcp_timestamps __read_mostly = 1; -int sysctl_tcp_window_scaling __read_mostly = 1; -int sysctl_tcp_sack __read_mostly = 1; int sysctl_tcp_fack __read_mostly; int sysctl_tcp_max_reordering __read_mostly = 300; int sysctl_tcp_dsack __read_mostly = 1; @@ -112,6 +109,7 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ #define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ +#define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */ #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) @@ -441,7 +439,7 @@ void tcp_init_buffer_space(struct sock *sk) tcp_sndbuf_expand(sk); tp->rcvq_space.space = tp->rcv_wnd; - skb_mstamp_get(&tp->tcp_mstamp); + tcp_mstamp_refresh(tp); tp->rcvq_space.time = tp->tcp_mstamp; tp->rcvq_space.seq = tp->copied_seq; @@ -463,7 +461,7 @@ void tcp_init_buffer_space(struct sock *sk) tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss); tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp); - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_jiffies32; } /* 5. Recalculate window clamp after socket hit its memory bounds. */ @@ -555,11 +553,11 @@ static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp) { u32 delta_us; - if (tp->rcv_rtt_est.time.v64 == 0) + if (tp->rcv_rtt_est.time == 0) goto new_measure; if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq)) return; - delta_us = skb_mstamp_us_delta(&tp->tcp_mstamp, &tp->rcv_rtt_est.time); + delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time); tcp_rcv_rtt_update(tp, delta_us, 1); new_measure: @@ -571,13 +569,15 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, const struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); + if (tp->rx_opt.rcv_tsecr && (TCP_SKB_CB(skb)->end_seq - - TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) - tcp_rcv_rtt_update(tp, - jiffies_to_usecs(tcp_time_stamp - - tp->rx_opt.rcv_tsecr), - 0); + TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) { + u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr; + u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ); + + tcp_rcv_rtt_update(tp, delta_us, 0); + } } /* @@ -590,7 +590,7 @@ void tcp_rcv_space_adjust(struct sock *sk) int time; int copied; - time = skb_mstamp_us_delta(&tp->tcp_mstamp, &tp->rcvq_space.time); + time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time); if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0) return; @@ -672,7 +672,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) tcp_rcv_rtt_measure(tp); - now = tcp_time_stamp; + now = tcp_jiffies32; if (!icsk->icsk_ack.ato) { /* The _first_ data packet received, initialize @@ -885,6 +885,9 @@ static void tcp_update_reordering(struct sock *sk, const int metric, struct tcp_sock *tp = tcp_sk(sk); int mib_idx; + if (WARN_ON_ONCE(metric < 0)) + return; + if (metric > tp->reordering) { tp->reordering = min(sysctl_tcp_max_reordering, metric); @@ -1134,8 +1137,8 @@ struct tcp_sacktag_state { * that was SACKed. RTO needs the earliest RTT to stay conservative, * but congestion control should still get an accurate delay signal. */ - struct skb_mstamp first_sackt; - struct skb_mstamp last_sackt; + u64 first_sackt; + u64 last_sackt; struct rate_sample *rate; int flag; }; @@ -1200,7 +1203,7 @@ static u8 tcp_sacktag_one(struct sock *sk, struct tcp_sacktag_state *state, u8 sacked, u32 start_seq, u32 end_seq, int dup_sack, int pcount, - const struct skb_mstamp *xmit_time) + u64 xmit_time) { struct tcp_sock *tp = tcp_sk(sk); int fack_count = state->fack_count; @@ -1242,9 +1245,9 @@ static u8 tcp_sacktag_one(struct sock *sk, state->reord); if (!after(end_seq, tp->high_seq)) state->flag |= FLAG_ORIG_SACK_ACKED; - if (state->first_sackt.v64 == 0) - state->first_sackt = *xmit_time; - state->last_sackt = *xmit_time; + if (state->first_sackt == 0) + state->first_sackt = xmit_time; + state->last_sackt = xmit_time; } if (sacked & TCPCB_LOST) { @@ -1304,7 +1307,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, */ tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, start_seq, end_seq, dup_sack, pcount, - &skb->skb_mstamp); + skb->skb_mstamp); tcp_rate_skb_delivered(sk, skb, state->rate); if (skb == tp->lost_skb_hint) @@ -1356,8 +1359,8 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, tcp_advance_highest_sack(sk, skb); tcp_skb_collapse_tstamp(prev, skb); - if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp.v64)) - TCP_SKB_CB(prev)->tx.delivered_mstamp.v64 = 0; + if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp)) + TCP_SKB_CB(prev)->tx.delivered_mstamp = 0; tcp_unlink_write_queue(skb, sk); sk_wmem_free_skb(sk, skb); @@ -1587,7 +1590,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, TCP_SKB_CB(skb)->end_seq, dup_sack, tcp_skb_pcount(skb), - &skb->skb_mstamp); + skb->skb_mstamp); tcp_rate_skb_delivered(sk, skb, state->rate); if (!before(TCP_SKB_CB(skb)->seq, @@ -1954,7 +1957,7 @@ void tcp_enter_loss(struct sock *sk) } tp->snd_cwnd = 1; tp->snd_cwnd_cnt = 0; - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_jiffies32; tp->retrans_out = 0; tp->lost_out = 0; @@ -2383,7 +2386,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss) tcp_ecn_withdraw_cwr(tp); } } - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_jiffies32; tp->undo_marker = 0; } @@ -2520,7 +2523,7 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk) if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) { tp->snd_cwnd = tp->snd_ssthresh; - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_jiffies32; } tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); } @@ -2590,7 +2593,7 @@ static void tcp_mtup_probe_success(struct sock *sk) tcp_mss_to_mtu(sk, tp->mss_cache) / icsk->icsk_mtup.probe_size; tp->snd_cwnd_cnt = 0; - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_jiffies32; tp->snd_ssthresh = tcp_current_ssthresh(sk); icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size; @@ -2911,13 +2914,13 @@ static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us) struct tcp_sock *tp = tcp_sk(sk); u32 wlen = sysctl_tcp_min_rtt_wlen * HZ; - minmax_running_min(&tp->rtt_min, wlen, tcp_time_stamp, + minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32, rtt_us ? : jiffies_to_usecs(1)); } -static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, - long seq_rtt_us, long sack_rtt_us, - long ca_rtt_us) +static bool tcp_ack_update_rtt(struct sock *sk, const int flag, + long seq_rtt_us, long sack_rtt_us, + long ca_rtt_us, struct rate_sample *rs) { const struct tcp_sock *tp = tcp_sk(sk); @@ -2936,9 +2939,13 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, * See draft-ietf-tcplw-high-performance-00, section 3.3. */ if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && - flag & FLAG_ACKED) - seq_rtt_us = ca_rtt_us = jiffies_to_usecs(tcp_time_stamp - - tp->rx_opt.rcv_tsecr); + flag & FLAG_ACKED) { + u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr; + u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ); + + seq_rtt_us = ca_rtt_us = delta_us; + } + rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */ if (seq_rtt_us < 0) return false; @@ -2958,16 +2965,13 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, /* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */ void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req) { + struct rate_sample rs; long rtt_us = -1L; - if (req && !req->num_retrans && tcp_rsk(req)->snt_synack.v64) { - struct skb_mstamp now; + if (req && !req->num_retrans && tcp_rsk(req)->snt_synack) + rtt_us = tcp_stamp_us_delta(tcp_clock_us(), tcp_rsk(req)->snt_synack); - skb_mstamp_get(&now); - rtt_us = skb_mstamp_us_delta(&now, &tcp_rsk(req)->snt_synack); - } - - tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L, rtt_us); + tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L, rtt_us, &rs); } @@ -2976,7 +2980,7 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) const struct inet_connection_sock *icsk = inet_csk(sk); icsk->icsk_ca_ops->cong_avoid(sk, ack, acked); - tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp; + tcp_sk(sk)->snd_cwnd_stamp = tcp_jiffies32; } /* Restart timer after forward progress on connection. @@ -3001,14 +3005,14 @@ void tcp_rearm_rto(struct sock *sk) if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { struct sk_buff *skb = tcp_write_queue_head(sk); - const u32 rto_time_stamp = - tcp_skb_timestamp(skb) + rto; - s32 delta = (s32)(rto_time_stamp - tcp_time_stamp); - /* delta may not be positive if the socket is locked + u64 rto_time_stamp = skb->skb_mstamp + + jiffies_to_usecs(rto); + s64 delta_us = rto_time_stamp - tp->tcp_mstamp; + /* delta_us may not be positive if the socket is locked * when the retrans timer fires and is rescheduled. */ - if (delta > 0) - rto = delta; + if (delta_us > 0) + rto = usecs_to_jiffies(delta_us); } inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, TCP_RTO_MAX); @@ -3060,9 +3064,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, struct tcp_sacktag_state *sack) { const struct inet_connection_sock *icsk = inet_csk(sk); - struct skb_mstamp first_ackt, last_ackt; + u64 first_ackt, last_ackt; struct tcp_sock *tp = tcp_sk(sk); - struct skb_mstamp *now = &tp->tcp_mstamp; u32 prior_sacked = tp->sacked_out; u32 reord = tp->packets_out; bool fully_acked = true; @@ -3075,7 +3078,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, bool rtt_update; int flag = 0; - first_ackt.v64 = 0; + first_ackt = 0; while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); @@ -3106,8 +3109,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, flag |= FLAG_RETRANS_DATA_ACKED; } else if (!(sacked & TCPCB_SACKED_ACKED)) { last_ackt = skb->skb_mstamp; - WARN_ON_ONCE(last_ackt.v64 == 0); - if (!first_ackt.v64) + WARN_ON_ONCE(last_ackt == 0); + if (!first_ackt) first_ackt = last_ackt; last_in_flight = TCP_SKB_CB(skb)->tx.in_flight; @@ -3122,7 +3125,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tp->delivered += acked_pcount; if (!tcp_skb_spurious_retrans(tp, skb)) tcp_rack_advance(tp, sacked, scb->end_seq, - &skb->skb_mstamp); + skb->skb_mstamp); } if (sacked & TCPCB_LOST) tp->lost_out -= acked_pcount; @@ -3165,17 +3168,16 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) flag |= FLAG_SACK_RENEGING; - if (likely(first_ackt.v64) && !(flag & FLAG_RETRANS_DATA_ACKED)) { - seq_rtt_us = skb_mstamp_us_delta(now, &first_ackt); - ca_rtt_us = skb_mstamp_us_delta(now, &last_ackt); + if (likely(first_ackt) && !(flag & FLAG_RETRANS_DATA_ACKED)) { + seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt); + ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt); } - if (sack->first_sackt.v64) { - sack_rtt_us = skb_mstamp_us_delta(now, &sack->first_sackt); - ca_rtt_us = skb_mstamp_us_delta(now, &sack->last_sackt); + if (sack->first_sackt) { + sack_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->first_sackt); + ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->last_sackt); } - sack->rate->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet, or -1 */ rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us, - ca_rtt_us); + ca_rtt_us, sack->rate); if (flag & FLAG_ACKED) { tcp_rearm_rto(sk); @@ -3201,7 +3203,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tp->fackets_out -= min(pkts_acked, tp->fackets_out); } else if (skb && rtt_update && sack_rtt_us >= 0 && - sack_rtt_us > skb_mstamp_us_delta(now, &skb->skb_mstamp)) { + sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp)) { /* Do not re-arm RTO if the sack RTT is measured from data sent * after when the head was last (re)transmitted. Otherwise the * timeout may continue to extend in loss recovery. @@ -3211,7 +3213,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (icsk->icsk_ca_ops->pkts_acked) { struct ack_sample sample = { .pkts_acked = pkts_acked, - .rtt_us = ca_rtt_us, + .rtt_us = sack->rate->rtt_us, .in_flight = last_in_flight }; icsk->icsk_ca_ops->pkts_acked(sk, &sample); @@ -3390,7 +3392,7 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, u32 *last_oow_ack_time) { if (*last_oow_ack_time) { - s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time); + s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time); if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) { NET_INC_STATS(net, mib_idx); @@ -3398,7 +3400,7 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, } } - *last_oow_ack_time = tcp_time_stamp; + *last_oow_ack_time = tcp_jiffies32; return false; /* not rate-limited: go ahead, send dupack now! */ } @@ -3553,7 +3555,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) int acked = 0; /* Number of packets newly acked */ int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */ - sack_state.first_sackt.v64 = 0; + sack_state.first_sackt = 0; sack_state.rate = &rs; /* We very likely will need to access write queue head. */ @@ -3565,7 +3567,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (before(ack, prior_snd_una)) { /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */ if (before(ack, prior_snd_una - tp->max_window)) { - tcp_send_challenge_ack(sk, skb); + if (!(flag & FLAG_NO_CHALLENGE_ACK)) + tcp_send_challenge_ack(sk, skb); return -1; } goto old_ack; @@ -3636,7 +3639,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) */ sk->sk_err_soft = 0; icsk->icsk_probes_out = 0; - tp->rcv_tstamp = tcp_time_stamp; + tp->rcv_tstamp = tcp_jiffies32; if (!prior_packets) goto no_queue; @@ -3718,7 +3721,8 @@ static void tcp_parse_fastopen_option(int len, const unsigned char *cookie, * But, this can also be called on packets in the established flow when * the fast version below fails. */ -void tcp_parse_options(const struct sk_buff *skb, +void tcp_parse_options(const struct net *net, + const struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab, struct tcp_fastopen_cookie *foc) { @@ -3759,7 +3763,7 @@ void tcp_parse_options(const struct sk_buff *skb, break; case TCPOPT_WINDOW: if (opsize == TCPOLEN_WINDOW && th->syn && - !estab && sysctl_tcp_window_scaling) { + !estab && net->ipv4.sysctl_tcp_window_scaling) { __u8 snd_wscale = *(__u8 *)ptr; opt_rx->wscale_ok = 1; if (snd_wscale > TCP_MAX_WSCALE) { @@ -3775,7 +3779,7 @@ void tcp_parse_options(const struct sk_buff *skb, case TCPOPT_TIMESTAMP: if ((opsize == TCPOLEN_TIMESTAMP) && ((estab && opt_rx->tstamp_ok) || - (!estab && sysctl_tcp_timestamps))) { + (!estab && net->ipv4.sysctl_tcp_timestamps))) { opt_rx->saw_tstamp = 1; opt_rx->rcv_tsval = get_unaligned_be32(ptr); opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4); @@ -3783,7 +3787,7 @@ void tcp_parse_options(const struct sk_buff *skb, break; case TCPOPT_SACK_PERM: if (opsize == TCPOLEN_SACK_PERM && th->syn && - !estab && sysctl_tcp_sack) { + !estab && net->ipv4.sysctl_tcp_sack) { opt_rx->sack_ok = TCP_SACK_SEEN; tcp_sack_reset(opt_rx); } @@ -3852,7 +3856,8 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr /* Fast parse options. This hopes to only see timestamps. * If it is wrong it falls back on tcp_parse_options(). */ -static bool tcp_fast_parse_options(const struct sk_buff *skb, +static bool tcp_fast_parse_options(const struct net *net, + const struct sk_buff *skb, const struct tcphdr *th, struct tcp_sock *tp) { /* In the spirit of fast parsing, compare doff directly to constant @@ -3867,7 +3872,7 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb, return true; } - tcp_parse_options(skb, &tp->rx_opt, 1, NULL); + tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL); if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) tp->rx_opt.rcv_tsecr -= tp->tsoffset; @@ -5019,7 +5024,7 @@ static void tcp_new_space(struct sock *sk) if (tcp_should_expand_sndbuf(sk)) { tcp_sndbuf_expand(sk); - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_jiffies32; } sk->sk_write_space(sk); @@ -5228,7 +5233,8 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, bool rst_seq_match = false; /* RFC1323: H1. Apply PAWS check first. */ - if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && + if (tcp_fast_parse_options(sock_net(sk), skb, th, tp) && + tp->rx_opt.saw_tstamp && tcp_paws_discard(sk, skb)) { if (!th->rst) { NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); @@ -5356,7 +5362,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, { struct tcp_sock *tp = tcp_sk(sk); - skb_mstamp_get(&tp->tcp_mstamp); + tcp_mstamp_refresh(tp); if (unlikely(!sk->sk_rx_dst)) inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb); /* @@ -5554,7 +5560,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) struct inet_connection_sock *icsk = inet_csk(sk); tcp_set_state(sk, TCP_ESTABLISHED); - icsk->icsk_ack.lrcvtime = tcp_time_stamp; + icsk->icsk_ack.lrcvtime = tcp_jiffies32; if (skb) { icsk->icsk_af_ops->sk_rx_dst_set(sk, skb); @@ -5565,13 +5571,13 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) icsk->icsk_af_ops->rebuild_header(sk); tcp_init_metrics(sk); - + tcp_call_bpf(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB); tcp_init_congestion_control(sk); /* Prevent spurious tcp_cwnd_restart() on first data * packet. */ - tp->lsndtime = tcp_time_stamp; + tp->lsndtime = tcp_jiffies32; tcp_init_buffer_space(sk); @@ -5599,7 +5605,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, /* Get original SYNACK MSS value if user MSS sets mss_clamp */ tcp_clear_options(&opt); opt.user_mss = opt.mss_clamp = 0; - tcp_parse_options(synack, &opt, 0, NULL); + tcp_parse_options(sock_net(sk), synack, &opt, 0, NULL); mss = opt.mss_clamp; } @@ -5653,7 +5659,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, int saved_clamp = tp->rx_opt.mss_clamp; bool fastopen_fail; - tcp_parse_options(skb, &tp->rx_opt, 0, &foc); + tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc); if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) tp->rx_opt.rcv_tsecr -= tp->tsoffset; @@ -5672,7 +5678,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp, - tcp_time_stamp)) { + tcp_time_stamp(tp))) { NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED); goto reset_and_undo; @@ -5917,7 +5923,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) case TCP_SYN_SENT: tp->rx_opt.saw_tstamp = 0; - skb_mstamp_get(&tp->tcp_mstamp); + tcp_mstamp_refresh(tp); queued = tcp_rcv_synsent_state_process(sk, skb, th); if (queued >= 0) return queued; @@ -5929,7 +5935,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) return 0; } - skb_mstamp_get(&tp->tcp_mstamp); + tcp_mstamp_refresh(tp); tp->rx_opt.saw_tstamp = 0; req = tp->fastopen_rsk; if (req) { @@ -5948,13 +5954,17 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) /* step 5: check the ACK field */ acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH | - FLAG_UPDATE_TS_RECENT) > 0; + FLAG_UPDATE_TS_RECENT | + FLAG_NO_CHALLENGE_ACK) > 0; + if (!acceptable) { + if (sk->sk_state == TCP_SYN_RECV) + return 1; /* send one RST */ + tcp_send_challenge_ack(sk, skb); + goto discard; + } switch (sk->sk_state) { case TCP_SYN_RECV: - if (!acceptable) - return 1; - if (!tp->srtt_us) tcp_synack_rtt_meas(sk, req); @@ -5967,6 +5977,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) } else { /* Make sure socket is routed, for correct metrics. */ icsk->icsk_af_ops->rebuild_header(sk); + tcp_call_bpf(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB); tcp_init_congestion_control(sk); tcp_mtup_init(sk); @@ -6008,7 +6019,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tcp_update_pacing_rate(sk); /* Prevent spurious tcp_cwnd_restart() on first data packet */ - tp->lsndtime = tcp_time_stamp; + tp->lsndtime = tcp_jiffies32; tcp_initialize_rcv_mss(sk); tcp_fast_path_on(tp); @@ -6023,14 +6034,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) * our SYNACK so stop the SYNACK timer. */ if (req) { - /* Return RST if ack_seq is invalid. - * Note that RFC793 only says to generate a - * DUPACK for it but for TCP Fast Open it seems - * better to treat this case like TCP_SYN_RECV - * above. - */ - if (!acceptable) - return 1; /* We no longer need the request sock. */ reqsk_fastopen_remove(sk, req, false); tcp_rearm_rto(sk); @@ -6188,7 +6191,8 @@ static void tcp_ecn_create_request(struct request_sock *req, ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst; if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk) || - (ecn_ok_dst & DST_FEATURE_ECN_CA)) + (ecn_ok_dst & DST_FEATURE_ECN_CA) || + tcp_bpf_ca_needs_ecn((struct sock *)req)) inet_rsk(req)->ecn_ok = 1; } @@ -6202,7 +6206,7 @@ static void tcp_openreq_init(struct request_sock *req, req->cookie_ts = 0; tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq; tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; - skb_mstamp_get(&tcp_rsk(req)->snt_synack); + tcp_rsk(req)->snt_synack = tcp_clock_us(); tcp_rsk(req)->last_oow_ack_time = 0; req->mss = rx_opt->mss_clamp; req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0; @@ -6330,7 +6334,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = af_ops->mss_clamp; tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc); + tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, + want_cookie ? NULL : &foc); if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); @@ -6348,7 +6353,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, goto drop_and_free; if (tmp_opt.tstamp_ok) - tcp_rsk(req)->ts_off = af_ops->init_ts_off(skb); + tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb); if (!want_cookie && !isn) { /* Kill the following clause, if you dislike this way. */ @@ -6403,7 +6408,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, } else { tcp_rsk(req)->tfo_listener = false; if (!want_cookie) - inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); + inet_csk_reqsk_queue_hash_add(sk, req, + tcp_timeout_init((struct sock *)req)); af_ops->send_synack(sk, dst, &fl, req, &foc, !want_cookie ? TCP_SYNACK_NORMAL : TCP_SYNACK_COOKIE); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5ab2aac5ca19..6ec6900eb300 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -80,6 +80,7 @@ #include <linux/stddef.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/inetdevice.h> #include <crypto/hash.h> #include <linux/scatterlist.h> @@ -102,10 +103,9 @@ static u32 tcp_v4_init_seq(const struct sk_buff *skb) tcp_hdr(skb)->source); } -static u32 tcp_v4_init_ts_off(const struct sk_buff *skb) +static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb) { - return secure_tcp_ts_off(ip_hdr(skb)->daddr, - ip_hdr(skb)->saddr); + return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr); } int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) @@ -242,7 +242,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_daddr, inet->inet_sport, usin->sin_port); - tp->tsoffset = secure_tcp_ts_off(inet->inet_saddr, + tp->tsoffset = secure_tcp_ts_off(sock_net(sk), + inet->inet_saddr, inet->inet_daddr); } @@ -376,8 +377,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) struct sock *sk; struct sk_buff *skb; struct request_sock *fastopen; - __u32 seq, snd_una; - __u32 remaining; + u32 seq, snd_una; + s32 remaining; + u32 delta_us; int err; struct net *net = dev_net(icmp_skb->dev); @@ -483,11 +485,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) skb = tcp_write_queue_head(sk); BUG_ON(!skb); + tcp_mstamp_refresh(tp); + delta_us = (u32)(tp->tcp_mstamp - skb->skb_mstamp); remaining = icsk->icsk_rto - - min(icsk->icsk_rto, - tcp_time_stamp - tcp_skb_timestamp(skb)); + usecs_to_jiffies(delta_us); - if (remaining) { + if (remaining > 0) { inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, remaining, TCP_RTO_MAX); } else { @@ -811,7 +814,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) tcp_v4_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, - tcp_time_stamp + tcptw->tw_ts_offset, + tcp_time_stamp_raw() + tcptw->tw_ts_offset, tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), @@ -839,7 +842,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, tcp_v4_send_ack(sk, skb, seq, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, - tcp_time_stamp + tcp_rsk(req)->ts_off, + tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, req->ts_recent, 0, tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, @@ -904,6 +907,48 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk, { const struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; + const struct tcp_md5sig_info *md5sig; + __be32 mask; + struct tcp_md5sig_key *best_match = NULL; + bool match; + + /* caller either holds rcu_read_lock() or socket lock */ + md5sig = rcu_dereference_check(tp->md5sig_info, + lockdep_sock_is_held(sk)); + if (!md5sig) + return NULL; + + hlist_for_each_entry_rcu(key, &md5sig->head, node) { + if (key->family != family) + continue; + + if (family == AF_INET) { + mask = inet_make_mask(key->prefixlen); + match = (key->addr.a4.s_addr & mask) == + (addr->a4.s_addr & mask); +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + match = ipv6_prefix_equal(&key->addr.a6, &addr->a6, + key->prefixlen); +#endif + } else { + match = false; + } + + if (match && (!best_match || + key->prefixlen > best_match->prefixlen)) + best_match = key; + } + return best_match; +} +EXPORT_SYMBOL(tcp_md5_do_lookup); + +struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk, + const union tcp_md5_addr *addr, + int family, u8 prefixlen) +{ + const struct tcp_sock *tp = tcp_sk(sk); + struct tcp_md5sig_key *key; unsigned int size = sizeof(struct in_addr); const struct tcp_md5sig_info *md5sig; @@ -919,12 +964,12 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk, hlist_for_each_entry_rcu(key, &md5sig->head, node) { if (key->family != family) continue; - if (!memcmp(&key->addr, addr, size)) + if (!memcmp(&key->addr, addr, size) && + key->prefixlen == prefixlen) return key; } return NULL; } -EXPORT_SYMBOL(tcp_md5_do_lookup); struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, const struct sock *addr_sk) @@ -938,14 +983,15 @@ EXPORT_SYMBOL(tcp_v4_md5_lookup); /* This can be called on a newly created socket, from other files */ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, - int family, const u8 *newkey, u8 newkeylen, gfp_t gfp) + int family, u8 prefixlen, const u8 *newkey, u8 newkeylen, + gfp_t gfp) { /* Add Key to the list */ struct tcp_md5sig_key *key; struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_info *md5sig; - key = tcp_md5_do_lookup(sk, addr, family); + key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen); if (key) { /* Pre-existing entry - just update that one. */ memcpy(key->key, newkey, newkeylen); @@ -976,6 +1022,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, memcpy(key->key, newkey, newkeylen); key->keylen = newkeylen; key->family = family; + key->prefixlen = prefixlen; memcpy(&key->addr, addr, (family == AF_INET6) ? sizeof(struct in6_addr) : sizeof(struct in_addr)); @@ -984,11 +1031,12 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, } EXPORT_SYMBOL(tcp_md5_do_add); -int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) +int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family, + u8 prefixlen) { struct tcp_md5sig_key *key; - key = tcp_md5_do_lookup(sk, addr, family); + key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen); if (!key) return -ENOENT; hlist_del_rcu(&key->node); @@ -1014,11 +1062,12 @@ static void tcp_clear_md5_list(struct sock *sk) } } -static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, - int optlen) +static int tcp_v4_parse_md5_keys(struct sock *sk, int optname, + char __user *optval, int optlen) { struct tcp_md5sig cmd; struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr; + u8 prefixlen = 32; if (optlen < sizeof(cmd)) return -EINVAL; @@ -1029,15 +1078,22 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, if (sin->sin_family != AF_INET) return -EINVAL; + if (optname == TCP_MD5SIG_EXT && + cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { + prefixlen = cmd.tcpm_prefixlen; + if (prefixlen > 32) + return -EINVAL; + } + if (!cmd.tcpm_keylen) return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, - AF_INET); + AF_INET, prefixlen); if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) return -EINVAL; return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, - AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, + AF_INET, prefixlen, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); } @@ -1340,7 +1396,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, * across. Shucks. */ tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr, - AF_INET, key->key, key->keylen, GFP_ATOMIC); + AF_INET, 32, key->key, key->keylen, GFP_ATOMIC); sk_nocaps_add(newsk, NETIF_F_GSO_MASK); } #endif @@ -1673,6 +1729,8 @@ process: } if (nsk == sk) { reqsk_put(req); + } else if (tcp_filter(sk, skb)) { + goto discard_and_relse; } else if (tcp_child_process(sk, nsk, skb)) { tcp_v4_send_reset(nsk, skb); goto discard_and_relse; @@ -1858,6 +1916,8 @@ void tcp_v4_destroy_sock(struct sock *sk) tcp_cleanup_congestion_control(sk); + tcp_cleanup_ulp(sk); + /* Cleanup up the write buffer. */ tcp_write_queue_purge(sk); @@ -2263,7 +2323,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)), icsk->icsk_probes_out, sock_i_ino(sk), - atomic_read(&sk->sk_refcnt), sk, + refcount_read(&sk->sk_refcnt), sk, jiffies_to_clock_t(icsk->icsk_rto), jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, @@ -2289,7 +2349,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw, " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK", i, src, srcp, dest, destp, tw->tw_substate, 0, 0, 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, - atomic_read(&tw->tw_refcnt), tw); + refcount_read(&tw->tw_refcnt), tw); } #define TMPSZ 150 @@ -2385,6 +2445,7 @@ struct proto tcp_prot = { .unhash = inet_unhash, .get_port = inet_csk_get_port, .enter_memory_pressure = tcp_enter_memory_pressure, + .leave_memory_pressure = tcp_leave_memory_pressure, .stream_memory_free = tcp_stream_memory_free, .sockets_allocated = &tcp_sockets_allocated, .orphan_count = &tcp_orphan_count, @@ -2463,6 +2524,9 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo; net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 256); + net->ipv4.sysctl_tcp_sack = 1; + net->ipv4.sysctl_tcp_window_scaling = 1; + net->ipv4.sysctl_tcp_timestamps = 1; return 0; fail: diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index d6fb6c067af4..ae10ed64fe13 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -37,7 +37,7 @@ #include <net/tcp.h> /* resolution of owd */ -#define LP_RESOL 1000 +#define LP_RESOL TCP_TS_HZ /** * enum tcp_lp_state @@ -147,9 +147,9 @@ static u32 tcp_lp_remote_hz_estimator(struct sock *sk) tp->rx_opt.rcv_tsecr == lp->local_ref_time) goto out; - m = HZ * (tp->rx_opt.rcv_tsval - - lp->remote_ref_time) / (tp->rx_opt.rcv_tsecr - - lp->local_ref_time); + m = TCP_TS_HZ * + (tp->rx_opt.rcv_tsval - lp->remote_ref_time) / + (tp->rx_opt.rcv_tsecr - lp->local_ref_time); if (m < 0) m = -m; @@ -194,7 +194,7 @@ static u32 tcp_lp_owd_calculator(struct sock *sk) if (lp->flag & LP_VALID_RHZ) { owd = tp->rx_opt.rcv_tsval * (LP_RESOL / lp->remote_hz) - - tp->rx_opt.rcv_tsecr * (LP_RESOL / HZ); + tp->rx_opt.rcv_tsecr * (LP_RESOL / TCP_TS_HZ); if (owd < 0) owd = -owd; } @@ -264,18 +264,19 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample) { struct tcp_sock *tp = tcp_sk(sk); struct lp *lp = inet_csk_ca(sk); + u32 now = tcp_time_stamp(tp); u32 delta; if (sample->rtt_us > 0) tcp_lp_rtt_sample(sk, sample->rtt_us); /* calc inference */ - delta = tcp_time_stamp - tp->rx_opt.rcv_tsecr; + delta = now - tp->rx_opt.rcv_tsecr; if ((s32)delta > 0) lp->inference = 3 * delta; /* test if within inference */ - if (lp->last_drop && (tcp_time_stamp - lp->last_drop < lp->inference)) + if (lp->last_drop && (now - lp->last_drop < lp->inference)) lp->flag |= LP_WITHIN_INF; else lp->flag &= ~LP_WITHIN_INF; @@ -312,7 +313,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample) tp->snd_cwnd = max(tp->snd_cwnd >> 1U, 1U); /* record this drop time */ - lp->last_drop = tcp_time_stamp; + lp->last_drop = now; } static struct tcp_congestion_ops tcp_lp __read_mostly = { diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 653bbd67e3a3..102b2c90bb80 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -524,7 +524,7 @@ reset: tp->snd_cwnd = 1; else tp->snd_cwnd = tcp_init_cwnd(tp, dst); - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_jiffies32; } bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 717be4de5324..0ff83c1637d8 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -98,7 +98,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { - tcp_parse_options(skb, &tmp_opt, 0, NULL); + tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL); if (tmp_opt.saw_tstamp) { if (tmp_opt.rcv_tsecr) @@ -351,6 +351,7 @@ void tcp_openreq_init_rwin(struct request_sock *req, int full_space = tcp_full_space(sk_listener); u32 window_clamp; __u8 rcv_wscale; + u32 rcv_wnd; int mss; mss = tcp_mss_clamp(tp, dst_metric_advmss(dst)); @@ -363,6 +364,12 @@ void tcp_openreq_init_rwin(struct request_sock *req, (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0)) req->rsk_window_clamp = full_space; + rcv_wnd = tcp_rwnd_init_bpf((struct sock *)req); + if (rcv_wnd == 0) + rcv_wnd = dst_metric(dst, RTAX_INITRWND); + else if (full_space < rcv_wnd * mss) + full_space = rcv_wnd * mss; + /* tcp_full_space because it is guaranteed to be the first packet */ tcp_select_initial_window(full_space, mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), @@ -370,7 +377,7 @@ void tcp_openreq_init_rwin(struct request_sock *req, &req->rsk_window_clamp, ireq->wscale_ok, &rcv_wscale, - dst_metric(dst, RTAX_INITRWND)); + rcv_wnd); ireq->rcv_wscale = rcv_wscale; } EXPORT_SYMBOL(tcp_openreq_init_rwin); @@ -445,9 +452,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->srtt_us = 0; newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); - minmax_reset(&newtp->rtt_min, tcp_time_stamp, ~0U); + minmax_reset(&newtp->rtt_min, tcp_jiffies32, ~0U); newicsk->icsk_rto = TCP_TIMEOUT_INIT; - newicsk->icsk_ack.lrcvtime = tcp_time_stamp; + newicsk->icsk_ack.lrcvtime = tcp_jiffies32; newtp->packets_out = 0; newtp->retrans_out = 0; @@ -455,7 +462,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->fackets_out = 0; newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; newtp->tlp_high_seq = 0; - newtp->lsndtime = treq->snt_synack.stamp_jiffies; + newtp->lsndtime = tcp_jiffies32; newsk->sk_txhash = treq->txhash; newtp->last_oow_ack_time = 0; newtp->total_retrans = req->num_retrans; @@ -526,7 +533,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->fastopen_req = NULL; newtp->fastopen_rsk = NULL; newtp->syn_data_acked = 0; - newtp->rack.mstamp.v64 = 0; + newtp->rack.mstamp = 0; newtp->rack.advanced = 0; __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS); @@ -559,7 +566,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(struct tcphdr)>>2)) { - tcp_parse_options(skb, &tmp_opt, 0, NULL); + tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = req->ts_recent; diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c index 5de82a8d4d87..6d650ed3cb59 100644 --- a/net/ipv4/tcp_nv.c +++ b/net/ipv4/tcp_nv.c @@ -424,8 +424,8 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample) } /* Extract info for Tcp socket info provided via netlink */ -size_t tcpnv_get_info(struct sock *sk, u32 ext, int *attr, - union tcp_cc_info *info) +static size_t tcpnv_get_info(struct sock *sk, u32 ext, int *attr, + union tcp_cc_info *info) { const struct tcpnv *ca = inet_csk_ca(sk); @@ -440,7 +440,6 @@ size_t tcpnv_get_info(struct sock *sk, u32 ext, int *attr, } return 0; } -EXPORT_SYMBOL_GPL(tcpnv_get_info); static struct tcp_congestion_ops tcpnv __read_mostly = { .init = tcpnv_init, diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index bc68da38ea86..11f69bbf9307 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -152,7 +152,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, swap(gso_skb->sk, skb->sk); swap(gso_skb->destructor, skb->destructor); sum_truesize += skb->truesize; - atomic_add(sum_truesize - gso_skb->truesize, + refcount_add(sum_truesize - gso_skb->truesize, &skb->sk->sk_wmem_alloc); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 4858e190f6ac..4e985dea1dd2 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -151,7 +151,7 @@ void tcp_cwnd_restart(struct sock *sk, s32 delta) while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd) cwnd >>= 1; tp->snd_cwnd = max(cwnd, restart_cwnd); - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_jiffies32; tp->snd_cwnd_used = 0; } @@ -160,7 +160,7 @@ static void tcp_event_data_sent(struct tcp_sock *tp, struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); - const u32 now = tcp_time_stamp; + const u32 now = tcp_jiffies32; if (tcp_packets_in_flight(tp) == 0) tcp_ca_event(sk, CA_EVENT_TX_START); @@ -316,7 +316,8 @@ static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR; if (!(tp->ecn_flags & TCP_ECN_OK)) TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE; - else if (tcp_ca_needs_ecn(sk)) + else if (tcp_ca_needs_ecn(sk) || + tcp_bpf_ca_needs_ecn(sk)) INET_ECN_xmit(sk); } @@ -324,8 +325,9 @@ static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb) static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); + bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk); bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 || - tcp_ca_needs_ecn(sk); + tcp_ca_needs_ecn(sk) || bpf_needs_ecn; if (!use_ecn) { const struct dst_entry *dst = __sk_dst_get(sk); @@ -339,7 +341,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) if (use_ecn) { TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR; tp->ecn_flags = TCP_ECN_OK; - if (tcp_ca_needs_ecn(sk)) + if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn) INET_ECN_xmit(sk); } } @@ -569,18 +571,18 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, opts->mss = tcp_advertise_mss(sk); remaining -= TCPOLEN_MSS_ALIGNED; - if (likely(sysctl_tcp_timestamps && !*md5)) { + if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !*md5)) { opts->options |= OPTION_TS; opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset; opts->tsecr = tp->rx_opt.ts_recent; remaining -= TCPOLEN_TSTAMP_ALIGNED; } - if (likely(sysctl_tcp_window_scaling)) { + if (likely(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) { opts->ws = tp->rx_opt.rcv_wscale; opts->options |= OPTION_WSCALE; remaining -= TCPOLEN_WSCALE_ALIGNED; } - if (likely(sysctl_tcp_sack)) { + if (likely(sock_net(sk)->ipv4.sysctl_tcp_sack)) { opts->options |= OPTION_SACK_ADVERTISE; if (unlikely(!(OPTION_TS & opts->options))) remaining -= TCPOLEN_SACKPERM_ALIGNED; @@ -861,12 +863,11 @@ void tcp_wfree(struct sk_buff *skb) struct sock *sk = skb->sk; struct tcp_sock *tp = tcp_sk(sk); unsigned long flags, nval, oval; - int wmem; /* Keep one reference on sk_wmem_alloc. * Will be released by sk_free() from here or tcp_tasklet_func() */ - wmem = atomic_sub_return(skb->truesize - 1, &sk->sk_wmem_alloc); + WARN_ON(refcount_sub_and_test(skb->truesize - 1, &sk->sk_wmem_alloc)); /* If this softirq is serviced by ksoftirqd, we are likely under stress. * Wait until our queues (qdisc + devices) are drained. @@ -875,7 +876,7 @@ void tcp_wfree(struct sk_buff *skb) * - chance for incoming ACK (processed by another cpu maybe) * to migrate this flow (skb->ooo_okay will be eventually set) */ - if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current) + if (refcount_read(&sk->sk_wmem_alloc) >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current) goto out; for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) { @@ -904,6 +905,72 @@ out: sk_free(sk); } +/* Note: Called under hard irq. + * We can not call TCP stack right away. + */ +enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer) +{ + struct tcp_sock *tp = container_of(timer, struct tcp_sock, pacing_timer); + struct sock *sk = (struct sock *)tp; + unsigned long nval, oval; + + for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) { + struct tsq_tasklet *tsq; + bool empty; + + if (oval & TSQF_QUEUED) + break; + + nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED; + nval = cmpxchg(&sk->sk_tsq_flags, oval, nval); + if (nval != oval) + continue; + + if (!refcount_inc_not_zero(&sk->sk_wmem_alloc)) + break; + /* queue this socket to tasklet queue */ + tsq = this_cpu_ptr(&tsq_tasklet); + empty = list_empty(&tsq->head); + list_add(&tp->tsq_node, &tsq->head); + if (empty) + tasklet_schedule(&tsq->tasklet); + break; + } + return HRTIMER_NORESTART; +} + +/* BBR congestion control needs pacing. + * Same remark for SO_MAX_PACING_RATE. + * sch_fq packet scheduler is efficiently handling pacing, + * but is not always installed/used. + * Return true if TCP stack should pace packets itself. + */ +static bool tcp_needs_internal_pacing(const struct sock *sk) +{ + return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED; +} + +static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb) +{ + u64 len_ns; + u32 rate; + + if (!tcp_needs_internal_pacing(sk)) + return; + rate = sk->sk_pacing_rate; + if (!rate || rate == ~0U) + return; + + /* Should account for header sizes as sch_fq does, + * but lets make things simple. + */ + len_ns = (u64)skb->len * NSEC_PER_SEC; + do_div(len_ns, rate); + hrtimer_start(&tcp_sk(sk)->pacing_timer, + ktime_add_ns(ktime_get(), len_ns), + HRTIMER_MODE_ABS_PINNED); +} + /* This routine actually transmits TCP packets queued in by * tcp_do_sendmsg(). This is used by both the initial * transmission and possible later retransmissions. @@ -931,8 +998,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, BUG_ON(!skb || !tcp_skb_pcount(skb)); tp = tcp_sk(sk); + skb->skb_mstamp = tp->tcp_mstamp; if (clone_it) { - skb_mstamp_get(&skb->skb_mstamp); TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq - tp->snd_una; tcp_rate_skb_sent(sk, skb); @@ -979,7 +1046,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb->sk = sk; skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree; skb_set_hash_from_sk(skb, sk); - atomic_add(skb->truesize, &sk->sk_wmem_alloc); + refcount_add(skb->truesize, &sk->sk_wmem_alloc); skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm); @@ -1034,6 +1101,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, if (skb->len != tcp_header_size) { tcp_event_data_sent(tp, sk); tp->data_segs_out += tcp_skb_pcount(skb); + tcp_internal_pacing(sk, skb); } if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) @@ -1261,9 +1329,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, return 0; } -/* This is similar to __pskb_pull_head() (it will go to core/skbuff.c - * eventually). The difference is that pulled data not copied, but - * immediately discarded. +/* This is similar to __pskb_pull_tail(). The difference is that pulled + * data is not copied, but immediately discarded. */ static int __pskb_trim_head(struct sk_buff *skb, int len) { @@ -1298,7 +1365,6 @@ static int __pskb_trim_head(struct sk_buff *skb, int len) } shinfo->nr_frags = k; - skb_reset_tail_pointer(skb); skb->data_len -= len; skb->len = skb->data_len; return len; @@ -1408,7 +1474,7 @@ void tcp_mtup_init(struct sock *sk) icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss); icsk->icsk_mtup.probe_size = 0; if (icsk->icsk_mtup.enabled) - icsk->icsk_mtup.probe_timestamp = tcp_time_stamp; + icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; } EXPORT_SYMBOL(tcp_mtup_init); @@ -1509,7 +1575,7 @@ static void tcp_cwnd_application_limited(struct sock *sk) } tp->snd_cwnd_used = 0; } - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_jiffies32; } static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) @@ -1530,14 +1596,14 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) if (tcp_is_cwnd_limited(sk)) { /* Network is feed fully. */ tp->snd_cwnd_used = 0; - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_jiffies32; } else { /* Network starves. */ if (tp->packets_out > tp->snd_cwnd_used) tp->snd_cwnd_used = tp->packets_out; if (sysctl_tcp_slow_start_after_idle && - (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto && + (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto && !ca_ops->cong_control) tcp_cwnd_application_limited(sk); @@ -1839,7 +1905,6 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, const struct inet_connection_sock *icsk = inet_csk(sk); u32 age, send_win, cong_win, limit, in_flight; struct tcp_sock *tp = tcp_sk(sk); - struct skb_mstamp now; struct sk_buff *head; int win_divisor; @@ -1852,7 +1917,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, /* Avoid bursty behavior by allowing defer * only if the last write was recent. */ - if ((s32)(tcp_time_stamp - tp->lsndtime) > 0) + if ((s32)(tcp_jiffies32 - tp->lsndtime) > 0) goto send_now; in_flight = tcp_packets_in_flight(tp); @@ -1895,8 +1960,8 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, } head = tcp_write_queue_head(sk); - skb_mstamp_get(&now); - age = skb_mstamp_us_delta(&now, &head->skb_mstamp); + + age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp); /* If next ACK is likely to come too late (half srtt), do not defer */ if (age < (tp->srtt_us >> 4)) goto send_now; @@ -1921,7 +1986,7 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk) s32 delta; interval = net->ipv4.sysctl_tcp_probe_interval; - delta = tcp_time_stamp - icsk->icsk_mtup.probe_timestamp; + delta = tcp_jiffies32 - icsk->icsk_mtup.probe_timestamp; if (unlikely(delta >= interval * HZ)) { int mss = tcp_current_mss(sk); @@ -1933,7 +1998,7 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk) icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); /* Update probe time stamp */ - icsk->icsk_mtup.probe_timestamp = tcp_time_stamp; + icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; } } @@ -2086,6 +2151,12 @@ static int tcp_mtu_probe(struct sock *sk) return -1; } +static bool tcp_pacing_check(const struct sock *sk) +{ + return tcp_needs_internal_pacing(sk) && + hrtimer_active(&tcp_sk(sk)->pacing_timer); +} + /* TCP Small Queues : * Control number of packets in qdisc/devices to two packets / or ~1 ms. * (These limits are doubled for retransmits) @@ -2106,7 +2177,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes); limit <<= factor; - if (atomic_read(&sk->sk_wmem_alloc) > limit) { + if (refcount_read(&sk->sk_wmem_alloc) > limit) { /* Always send the 1st or 2nd skb in write queue. * No need to wait for TX completion to call us back, * after softirq/tasklet schedule. @@ -2122,7 +2193,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, * test again the condition. */ smp_mb__after_atomic(); - if (atomic_read(&sk->sk_wmem_alloc) > limit) + if (refcount_read(&sk->sk_wmem_alloc) > limit) return true; } return false; @@ -2130,7 +2201,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, static void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new) { - const u32 now = tcp_time_stamp; + const u32 now = tcp_jiffies32; if (tp->chrono_type > TCP_CHRONO_UNSPEC) tp->chrono_stat[tp->chrono_type - 1] += now - tp->chrono_start; @@ -2207,15 +2278,19 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, } max_segs = tcp_tso_segs(sk, mss_now); + tcp_mstamp_refresh(tp); while ((skb = tcp_send_head(sk))) { unsigned int limit; + if (tcp_pacing_check(sk)) + break; + tso_segs = tcp_init_tso_segs(skb, mss_now); BUG_ON(!tso_segs); if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) { /* "skb_mstamp" is used as a start point for the retransmit timer */ - skb_mstamp_get(&skb->skb_mstamp); + skb->skb_mstamp = tp->tcp_mstamp; goto repair; /* Skip network transmission */ } @@ -2342,10 +2417,10 @@ bool tcp_schedule_loss_probe(struct sock *sk) timeout = max_t(u32, timeout, msecs_to_jiffies(10)); /* If RTO is shorter, just schedule TLP in its place. */ - tlp_time_stamp = tcp_time_stamp + timeout; + tlp_time_stamp = tcp_jiffies32 + timeout; rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout; if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) { - s32 delta = rto_time_stamp - tcp_time_stamp; + s32 delta = rto_time_stamp - tcp_jiffies32; if (delta > 0) timeout = delta; } @@ -2738,7 +2813,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) /* Do not sent more than we queued. 1/4 is reserved for possible * copying overhead: fragmentation, tunneling, mangling etc. */ - if (atomic_read(&sk->sk_wmem_alloc) > + if (refcount_read(&sk->sk_wmem_alloc) > min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf)) return -EAGAIN; @@ -2803,7 +2878,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) skb_headroom(skb) >= 0xFFFF)) { struct sk_buff *nskb; - skb_mstamp_get(&skb->skb_mstamp); + skb->skb_mstamp = tp->tcp_mstamp; nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : -ENOBUFS; @@ -2878,6 +2953,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk) if (skb == tcp_send_head(sk)) break; + + if (tcp_pacing_check(sk)) + break; + /* we could do better than to assign each time */ if (!hole) tp->retransmit_skb_hint = skb; @@ -3015,7 +3094,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) skb_reserve(skb, MAX_TCP_HEADER); tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk), TCPHDR_ACK | TCPHDR_RST); - skb_mstamp_get(&skb->skb_mstamp); + tcp_mstamp_refresh(tcp_sk(sk)); /* Send it off. */ if (tcp_transmit_skb(sk, skb, 0, priority)) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); @@ -3111,10 +3190,10 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, memset(&opts, 0, sizeof(opts)); #ifdef CONFIG_SYN_COOKIES if (unlikely(req->cookie_ts)) - skb->skb_mstamp.stamp_jiffies = cookie_init_timestamp(req); + skb->skb_mstamp = cookie_init_timestamp(req); else #endif - skb_mstamp_get(&skb->skb_mstamp); + skb->skb_mstamp = tcp_clock_us(); #ifdef CONFIG_TCP_MD5SIG rcu_read_lock(); @@ -3134,6 +3213,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, tcp_ecn_make_synack(req, th); th->source = htons(ireq->ir_num); th->dest = ireq->ir_rmt_port; + skb->mark = ireq->ir_mark; /* Setting of flags are superfluous here for callers (and ECE is * not even correctly set) */ @@ -3189,12 +3269,14 @@ static void tcp_connect_init(struct sock *sk) const struct dst_entry *dst = __sk_dst_get(sk); struct tcp_sock *tp = tcp_sk(sk); __u8 rcv_wscale; + u32 rcv_wnd; /* We'll fix this up when we get a response from the other end. * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT. */ - tp->tcp_header_len = sizeof(struct tcphdr) + - (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0); + tp->tcp_header_len = sizeof(struct tcphdr); + if (sock_net(sk)->ipv4.sysctl_tcp_timestamps) + tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED; #ifdef CONFIG_TCP_MD5SIG if (tp->af_specific->md5_lookup(sk, sk)) @@ -3221,13 +3303,17 @@ static void tcp_connect_init(struct sock *sk) (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0)) tp->window_clamp = tcp_full_space(sk); + rcv_wnd = tcp_rwnd_init_bpf(sk); + if (rcv_wnd == 0) + rcv_wnd = dst_metric(dst, RTAX_INITRWND); + tcp_select_initial_window(tcp_full_space(sk), tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), &tp->rcv_wnd, &tp->window_clamp, - sysctl_tcp_window_scaling, + sock_net(sk)->ipv4.sysctl_tcp_window_scaling, &rcv_wscale, - dst_metric(dst, RTAX_INITRWND)); + rcv_wnd); tp->rx_opt.rcv_wscale = rcv_wscale; tp->rcv_ssthresh = tp->rcv_wnd; @@ -3244,11 +3330,11 @@ static void tcp_connect_init(struct sock *sk) if (likely(!tp->repair)) tp->rcv_nxt = 0; else - tp->rcv_tstamp = tcp_time_stamp; + tp->rcv_tstamp = tcp_jiffies32; tp->rcv_wup = tp->rcv_nxt; tp->copied_seq = tp->rcv_nxt; - inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; + inet_csk(sk)->icsk_rto = tcp_timeout_init(sk); inet_csk(sk)->icsk_retransmits = 0; tcp_clear_retrans(tp); } @@ -3361,6 +3447,7 @@ int tcp_connect(struct sock *sk) struct sk_buff *buff; int err; + tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB); tcp_connect_init(sk); if (unlikely(tp->repair)) { @@ -3373,7 +3460,8 @@ int tcp_connect(struct sock *sk) return -ENOBUFS; tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); - tp->retrans_stamp = tcp_time_stamp; + tcp_mstamp_refresh(tp); + tp->retrans_stamp = tcp_time_stamp(tp); tcp_connect_queue_skb(sk, buff); tcp_ecn_send_syn(sk, buff); @@ -3492,7 +3580,6 @@ void tcp_send_ack(struct sock *sk) skb_set_tcp_pure_ack(buff); /* Send it off, this clears delayed acks for us. */ - skb_mstamp_get(&buff->skb_mstamp); tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0); } EXPORT_SYMBOL_GPL(tcp_send_ack); @@ -3526,15 +3613,16 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib) * send it. */ tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK); - skb_mstamp_get(&skb->skb_mstamp); NET_INC_STATS(sock_net(sk), mib); return tcp_transmit_skb(sk, skb, 0, (__force gfp_t)0); } +/* Called from setsockopt( ... TCP_REPAIR ) */ void tcp_send_window_probe(struct sock *sk) { if (sk->sk_state == TCP_ESTABLISHED) { tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1; + tcp_mstamp_refresh(tcp_sk(sk)); tcp_xmit_probe_skb(sk, 0, LINUX_MIB_TCPWINPROBE); } } diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index c6a9fa894646..3330a370d306 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -78,7 +78,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, struct tcp_sock *tp = tcp_sk(sk); struct tcp_skb_cb *scb = TCP_SKB_CB(skb); - if (!scb->tx.delivered_mstamp.v64) + if (!scb->tx.delivered_mstamp) return; if (!rs->prior_delivered || @@ -89,9 +89,9 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, rs->is_retrans = scb->sacked & TCPCB_RETRANS; /* Find the duration of the "send phase" of this window: */ - rs->interval_us = skb_mstamp_us_delta( - &skb->skb_mstamp, - &scb->tx.first_tx_mstamp); + rs->interval_us = tcp_stamp_us_delta( + skb->skb_mstamp, + scb->tx.first_tx_mstamp); /* Record send time of most recently ACKed packet: */ tp->first_tx_mstamp = skb->skb_mstamp; @@ -101,7 +101,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, * we don't need to reset since it'll be freed soon. */ if (scb->sacked & TCPCB_SACKED_ACKED) - scb->tx.delivered_mstamp.v64 = 0; + scb->tx.delivered_mstamp = 0; } /* Update the connection delivery information and generate a rate sample. */ @@ -125,7 +125,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, rs->acked_sacked = delivered; /* freshly ACKed or SACKed */ rs->losses = lost; /* freshly marked lost */ /* Return an invalid sample if no timing information is available. */ - if (!rs->prior_mstamp.v64) { + if (!rs->prior_mstamp) { rs->delivered = -1; rs->interval_us = -1; return; @@ -138,8 +138,8 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, * longer phase. */ snd_us = rs->interval_us; /* send phase */ - ack_us = skb_mstamp_us_delta(&tp->tcp_mstamp, - &rs->prior_mstamp); /* ack phase */ + ack_us = tcp_stamp_us_delta(tp->tcp_mstamp, + rs->prior_mstamp); /* ack phase */ rs->interval_us = max(snd_us, ack_us); /* Normally we expect interval_us >= min-rtt. @@ -185,3 +185,4 @@ void tcp_rate_check_app_limited(struct sock *sk) tp->app_limited = (tp->delivered + tcp_packets_in_flight(tp)) ? : 1; } +EXPORT_SYMBOL_GPL(tcp_rate_check_app_limited); diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 362b8c75bfab..fe9a493d0208 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -17,12 +17,9 @@ static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb) } } -static bool tcp_rack_sent_after(const struct skb_mstamp *t1, - const struct skb_mstamp *t2, - u32 seq1, u32 seq2) +static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2) { - return skb_mstamp_after(t1, t2) || - (t1->v64 == t2->v64 && after(seq1, seq2)); + return t1 > t2 || (t1 == t2 && after(seq1, seq2)); } /* RACK loss detection (IETF draft draft-ietf-tcpm-rack-01): @@ -72,14 +69,14 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) scb->sacked & TCPCB_SACKED_ACKED) continue; - if (tcp_rack_sent_after(&tp->rack.mstamp, &skb->skb_mstamp, + if (tcp_rack_sent_after(tp->rack.mstamp, skb->skb_mstamp, tp->rack.end_seq, scb->end_seq)) { /* Step 3 in draft-cheng-tcpm-rack-00.txt: * A packet is lost if its elapsed time is beyond * the recent RTT plus the reordering window. */ - u32 elapsed = skb_mstamp_us_delta(&tp->tcp_mstamp, - &skb->skb_mstamp); + u32 elapsed = tcp_stamp_us_delta(tp->tcp_mstamp, + skb->skb_mstamp); s32 remaining = tp->rack.rtt_us + reo_wnd - elapsed; if (remaining < 0) { @@ -127,16 +124,16 @@ void tcp_rack_mark_lost(struct sock *sk) * draft-cheng-tcpm-rack-00.txt */ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, - const struct skb_mstamp *xmit_time) + u64 xmit_time) { u32 rtt_us; - if (tp->rack.mstamp.v64 && - !tcp_rack_sent_after(xmit_time, &tp->rack.mstamp, + if (tp->rack.mstamp && + !tcp_rack_sent_after(xmit_time, tp->rack.mstamp, end_seq, tp->rack.end_seq)) return; - rtt_us = skb_mstamp_us_delta(&tp->tcp_mstamp, xmit_time); + rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, xmit_time); if (sacked & TCPCB_RETRANS) { /* If the sacked packet was retransmitted, it's ambiguous * whether the retransmission or the original (or the prior @@ -152,7 +149,7 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, return; } tp->rack.rtt_us = rtt_us; - tp->rack.mstamp = *xmit_time; + tp->rack.mstamp = xmit_time; tp->rack.end_seq = end_seq; tp->rack.advanced = 1; } @@ -166,7 +163,6 @@ void tcp_rack_reo_timeout(struct sock *sk) u32 timeout, prior_inflight; prior_inflight = tcp_packets_in_flight(tp); - skb_mstamp_get(&tp->tcp_mstamp); tcp_rack_detect_loss(sk, &timeout); if (prior_inflight != tcp_packets_in_flight(tp)) { if (inet_csk(sk)->icsk_ca_state != TCP_CA_Recovery) { diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 14672543cf0b..c0feeeef962a 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -63,7 +63,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) /* If peer does not open window for long time, or did not transmit * anything for long time, penalize it. */ - if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset) + if ((s32)(tcp_jiffies32 - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset) shift++; /* If some dubious ICMP arrived, penalize even more. */ @@ -73,7 +73,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) if (tcp_check_oom(sk, shift)) { /* Catch exceptional cases, when connection requires reset. * 1. Last segment was sent recently. */ - if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN || + if ((s32)(tcp_jiffies32 - tp->lsndtime) <= TCP_TIMEWAIT_LEN || /* 2. Window is closed. */ (!tp->snd_wnd && !tp->packets_out)) do_reset = true; @@ -115,7 +115,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) if (net->ipv4.sysctl_tcp_mtu_probing) { if (!icsk->icsk_mtup.enabled) { icsk->icsk_mtup.enabled = 1; - icsk->icsk_mtup.probe_timestamp = tcp_time_stamp; + icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } else { struct net *net = sock_net(sk); @@ -139,22 +139,18 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) * @timeout: A custom timeout value. * If set to 0 the default timeout is calculated and used. * Using TCP_RTO_MIN and the number of unsuccessful retransmits. - * @syn_set: true if the SYN Bit was set. * * The default "timeout" value this function can calculate and use * is equivalent to the timeout of a TCP Connection * after "boundary" unsuccessful, exponentially backed-off - * retransmissions with an initial RTO of TCP_RTO_MIN or TCP_TIMEOUT_INIT if - * syn_set flag is set. - * + * retransmissions with an initial RTO of TCP_RTO_MIN. */ static bool retransmits_timed_out(struct sock *sk, unsigned int boundary, - unsigned int timeout, - bool syn_set) + unsigned int timeout) { + const unsigned int rto_base = TCP_RTO_MIN; unsigned int linear_backoff_thresh, start_ts; - unsigned int rto_base = syn_set ? TCP_TIMEOUT_INIT : TCP_RTO_MIN; if (!inet_csk(sk)->icsk_retransmits) return false; @@ -172,7 +168,7 @@ static bool retransmits_timed_out(struct sock *sk, timeout = ((2 << linear_backoff_thresh) - 1) * rto_base + (boundary - linear_backoff_thresh) * TCP_RTO_MAX; } - return (tcp_time_stamp - start_ts) >= timeout; + return (tcp_time_stamp(tcp_sk(sk)) - start_ts) >= jiffies_to_msecs(timeout); } /* A write timeout has occurred. Process the after effects. */ @@ -181,8 +177,8 @@ static int tcp_write_timeout(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); + bool expired, do_reset; int retry_until; - bool do_reset, syn_set = false; if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { if (icsk->icsk_retransmits) { @@ -196,9 +192,9 @@ static int tcp_write_timeout(struct sock *sk) sk_rethink_txhash(sk); } retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; - syn_set = true; + expired = icsk->icsk_retransmits >= retry_until; } else { - if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0, 0)) { + if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0)) { /* Some middle-boxes may black-hole Fast Open _after_ * the handshake. Therefore we conservatively disable * Fast Open on this path on recurring timeouts after @@ -224,15 +220,15 @@ static int tcp_write_timeout(struct sock *sk) retry_until = tcp_orphan_retries(sk, alive); do_reset = alive || - !retransmits_timed_out(sk, retry_until, 0, 0); + !retransmits_timed_out(sk, retry_until, 0); if (tcp_out_of_resources(sk, do_reset)) return 1; } + expired = retransmits_timed_out(sk, retry_until, + icsk->icsk_user_timeout); } - - if (retransmits_timed_out(sk, retry_until, - syn_set ? 0 : icsk->icsk_user_timeout, syn_set)) { + if (expired) { /* Has it gone just too far? */ tcp_write_err(sk); return 1; @@ -339,9 +335,10 @@ static void tcp_probe_timer(struct sock *sk) */ start_ts = tcp_skb_timestamp(tcp_send_head(sk)); if (!start_ts) - skb_mstamp_get(&tcp_send_head(sk)->skb_mstamp); + tcp_send_head(sk)->skb_mstamp = tp->tcp_mstamp; else if (icsk->icsk_user_timeout && - (s32)(tcp_time_stamp - start_ts) > icsk->icsk_user_timeout) + (s32)(tcp_time_stamp(tp) - start_ts) > + jiffies_to_msecs(icsk->icsk_user_timeout)) goto abort; max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2; @@ -451,7 +448,7 @@ void tcp_retransmit_timer(struct sock *sk) tp->snd_una, tp->snd_nxt); } #endif - if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) { + if (tcp_jiffies32 - tp->rcv_tstamp > TCP_RTO_MAX) { tcp_write_err(sk); goto out; } @@ -539,7 +536,7 @@ out_reset_timer: icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); } inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); - if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0, 0)) + if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0)) __sk_dst_reset(sk); out:; @@ -561,6 +558,7 @@ void tcp_write_timer_handler(struct sock *sk) goto out; } + tcp_mstamp_refresh(tcp_sk(sk)); event = icsk->icsk_pending; switch (event) { @@ -710,4 +708,7 @@ void tcp_init_xmit_timers(struct sock *sk) { inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, &tcp_keepalive_timer); + hrtimer_init(&tcp_sk(sk)->pacing_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_ABS_PINNED); + tcp_sk(sk)->pacing_timer.function = tcp_pace_kick; } diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c new file mode 100644 index 000000000000..2417f55374c5 --- /dev/null +++ b/net/ipv4/tcp_ulp.c @@ -0,0 +1,135 @@ +/* + * Pluggable TCP upper layer protocol support. + * + * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved. + * Copyright (c) 2016-2017, Dave Watson <davejwatson@fb.com>. All rights reserved. + * + */ + +#include<linux/module.h> +#include <linux/mm.h> +#include <linux/types.h> +#include <linux/list.h> +#include <linux/gfp.h> +#include <net/tcp.h> + +static DEFINE_SPINLOCK(tcp_ulp_list_lock); +static LIST_HEAD(tcp_ulp_list); + +/* Simple linear search, don't expect many entries! */ +static struct tcp_ulp_ops *tcp_ulp_find(const char *name) +{ + struct tcp_ulp_ops *e; + + list_for_each_entry_rcu(e, &tcp_ulp_list, list) { + if (strcmp(e->name, name) == 0) + return e; + } + + return NULL; +} + +static const struct tcp_ulp_ops *__tcp_ulp_find_autoload(const char *name) +{ + const struct tcp_ulp_ops *ulp = NULL; + + rcu_read_lock(); + ulp = tcp_ulp_find(name); + +#ifdef CONFIG_MODULES + if (!ulp && capable(CAP_NET_ADMIN)) { + rcu_read_unlock(); + request_module("%s", name); + rcu_read_lock(); + ulp = tcp_ulp_find(name); + } +#endif + if (!ulp || !try_module_get(ulp->owner)) + ulp = NULL; + + rcu_read_unlock(); + return ulp; +} + +/* Attach new upper layer protocol to the list + * of available protocols. + */ +int tcp_register_ulp(struct tcp_ulp_ops *ulp) +{ + int ret = 0; + + spin_lock(&tcp_ulp_list_lock); + if (tcp_ulp_find(ulp->name)) { + pr_notice("%s already registered or non-unique name\n", + ulp->name); + ret = -EEXIST; + } else { + list_add_tail_rcu(&ulp->list, &tcp_ulp_list); + } + spin_unlock(&tcp_ulp_list_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(tcp_register_ulp); + +void tcp_unregister_ulp(struct tcp_ulp_ops *ulp) +{ + spin_lock(&tcp_ulp_list_lock); + list_del_rcu(&ulp->list); + spin_unlock(&tcp_ulp_list_lock); + + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(tcp_unregister_ulp); + +/* Build string with list of available upper layer protocl values */ +void tcp_get_available_ulp(char *buf, size_t maxlen) +{ + struct tcp_ulp_ops *ulp_ops; + size_t offs = 0; + + *buf = '\0'; + rcu_read_lock(); + list_for_each_entry_rcu(ulp_ops, &tcp_ulp_list, list) { + offs += snprintf(buf + offs, maxlen - offs, + "%s%s", + offs == 0 ? "" : " ", ulp_ops->name); + } + rcu_read_unlock(); +} + +void tcp_cleanup_ulp(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + if (!icsk->icsk_ulp_ops) + return; + + if (icsk->icsk_ulp_ops->release) + icsk->icsk_ulp_ops->release(sk); + module_put(icsk->icsk_ulp_ops->owner); +} + +/* Change upper layer protocol for socket */ +int tcp_set_ulp(struct sock *sk, const char *name) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcp_ulp_ops *ulp_ops; + int err = 0; + + if (icsk->icsk_ulp_ops) + return -EEXIST; + + ulp_ops = __tcp_ulp_find_autoload(name); + if (!ulp_ops) + err = -ENOENT; + else + err = ulp_ops->init(sk); + + if (err) + goto out; + + icsk->icsk_ulp_ops = ulp_ops; + out: + return err; +} diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index 9775453b8d17..bec9cafbe3f9 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -68,7 +68,7 @@ static void tcp_westwood_init(struct sock *sk) w->cumul_ack = 0; w->reset_rtt_min = 1; w->rtt_min = w->rtt = TCP_WESTWOOD_INIT_RTT; - w->rtt_win_sx = tcp_time_stamp; + w->rtt_win_sx = tcp_jiffies32; w->snd_una = tcp_sk(sk)->snd_una; w->first_ack = 1; } @@ -116,7 +116,7 @@ static void tcp_westwood_pkts_acked(struct sock *sk, static void westwood_update_window(struct sock *sk) { struct westwood *w = inet_csk_ca(sk); - s32 delta = tcp_time_stamp - w->rtt_win_sx; + s32 delta = tcp_jiffies32 - w->rtt_win_sx; /* Initialize w->snd_una with the first acked sequence number in order * to fix mismatch between tp->snd_una and w->snd_una for the first @@ -140,7 +140,7 @@ static void westwood_update_window(struct sock *sk) westwood_filter(w, delta); w->bk = 0; - w->rtt_win_sx = tcp_time_stamp; + w->rtt_win_sx = tcp_jiffies32; } } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1d6219bf2d6b..25294d43e147 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -577,7 +577,7 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table, NULL); - if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; } @@ -1163,23 +1163,62 @@ out: return ret; } +#if BITS_PER_LONG == 64 +static void udp_set_dev_scratch(struct sk_buff *skb) +{ + struct udp_dev_scratch *scratch; + + BUILD_BUG_ON(sizeof(struct udp_dev_scratch) > sizeof(long)); + scratch = (struct udp_dev_scratch *)&skb->dev_scratch; + scratch->truesize = skb->truesize; + scratch->len = skb->len; + scratch->csum_unnecessary = !!skb_csum_unnecessary(skb); + scratch->is_linear = !skb_is_nonlinear(skb); +} + +static int udp_skb_truesize(struct sk_buff *skb) +{ + return ((struct udp_dev_scratch *)&skb->dev_scratch)->truesize; +} +#else +static void udp_set_dev_scratch(struct sk_buff *skb) +{ + skb->dev_scratch = skb->truesize; +} + +static int udp_skb_truesize(struct sk_buff *skb) +{ + return skb->dev_scratch; +} +#endif + /* fully reclaim rmem/fwd memory allocated for skb */ -static void udp_rmem_release(struct sock *sk, int size, int partial) +static void udp_rmem_release(struct sock *sk, int size, int partial, + bool rx_queue_lock_held) { struct udp_sock *up = udp_sk(sk); + struct sk_buff_head *sk_queue; int amt; if (likely(partial)) { up->forward_deficit += size; size = up->forward_deficit; if (size < (sk->sk_rcvbuf >> 2) && - !skb_queue_empty(&sk->sk_receive_queue)) + !skb_queue_empty(&up->reader_queue)) return; } else { size += up->forward_deficit; } up->forward_deficit = 0; + /* acquire the sk_receive_queue for fwd allocated memory scheduling, + * if the called don't held it already + */ + sk_queue = &sk->sk_receive_queue; + if (!rx_queue_lock_held) + spin_lock(&sk_queue->lock); + + sk->sk_forward_alloc += size; amt = (sk->sk_forward_alloc - partial) & ~(SK_MEM_QUANTUM - 1); sk->sk_forward_alloc -= amt; @@ -1188,19 +1227,33 @@ static void udp_rmem_release(struct sock *sk, int size, int partial) __sk_mem_reduce_allocated(sk, amt >> SK_MEM_QUANTUM_SHIFT); atomic_sub(size, &sk->sk_rmem_alloc); + + /* this can save us from acquiring the rx queue lock on next receive */ + skb_queue_splice_tail_init(sk_queue, &up->reader_queue); + + if (!rx_queue_lock_held) + spin_unlock(&sk_queue->lock); } -/* Note: called with sk_receive_queue.lock held. +/* Note: called with reader_queue.lock held. * Instead of using skb->truesize here, find a copy of it in skb->dev_scratch * This avoids a cache line miss while receive_queue lock is held. * Look at __udp_enqueue_schedule_skb() to find where this copy is done. */ void udp_skb_destructor(struct sock *sk, struct sk_buff *skb) { - udp_rmem_release(sk, skb->dev_scratch, 1); + prefetch(&skb->data); + udp_rmem_release(sk, udp_skb_truesize(skb), 1, false); } EXPORT_SYMBOL(udp_skb_destructor); +/* as above, but the caller held the rx queue lock, too */ +static void udp_skb_dtor_locked(struct sock *sk, struct sk_buff *skb) +{ + prefetch(&skb->data); + udp_rmem_release(sk, udp_skb_truesize(skb), 1, true); +} + /* Idea of busylocks is to let producers grab an extra spinlock * to relieve pressure on the receive_queue spinlock shared by consumer. * Under flood, this means that only one producer can be in line @@ -1252,10 +1305,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) busy = busylock_acquire(sk); } size = skb->truesize; - /* Copy skb->truesize into skb->dev_scratch to avoid a cache line miss - * in udp_skb_destructor() - */ - skb->dev_scratch = size; + udp_set_dev_scratch(skb); /* we drop only if the receive buf is full and the receive * queue contains some other skb @@ -1306,14 +1356,16 @@ EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb); void udp_destruct_sock(struct sock *sk) { /* reclaim completely the forward allocated memory */ + struct udp_sock *up = udp_sk(sk); unsigned int total = 0; struct sk_buff *skb; - while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { + skb_queue_splice_tail_init(&sk->sk_receive_queue, &up->reader_queue); + while ((skb = __skb_dequeue(&up->reader_queue)) != NULL) { total += skb->truesize; kfree_skb(skb); } - udp_rmem_release(sk, total, 0); + udp_rmem_release(sk, total, 0, true); inet_sock_destruct(sk); } @@ -1321,6 +1373,7 @@ EXPORT_SYMBOL_GPL(udp_destruct_sock); int udp_init_sock(struct sock *sk) { + skb_queue_head_init(&udp_sk(sk)->reader_queue); sk->sk_destruct = udp_destruct_sock; return 0; } @@ -1334,10 +1387,38 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) sk_peek_offset_bwd(sk, len); unlock_sock_fast(sk, slow); } - consume_skb(skb); + + consume_stateless_skb(skb); } EXPORT_SYMBOL_GPL(skb_consume_udp); +static struct sk_buff *__first_packet_length(struct sock *sk, + struct sk_buff_head *rcvq, + int *total) +{ + struct sk_buff *skb; + + while ((skb = skb_peek(rcvq)) != NULL) { + if (udp_lib_checksum_complete(skb)) { + __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, + IS_UDPLITE(sk)); + __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, + IS_UDPLITE(sk)); + atomic_inc(&sk->sk_drops); + __skb_unlink(skb, rcvq); + *total += skb->truesize; + kfree_skb(skb); + } else { + /* the csum related bits could be changed, refresh + * the scratch area + */ + udp_set_dev_scratch(skb); + break; + } + } + return skb; +} + /** * first_packet_length - return length of first packet in receive queue * @sk: socket @@ -1347,26 +1428,24 @@ EXPORT_SYMBOL_GPL(skb_consume_udp); */ static int first_packet_length(struct sock *sk) { - struct sk_buff_head *rcvq = &sk->sk_receive_queue; + struct sk_buff_head *rcvq = &udp_sk(sk)->reader_queue; + struct sk_buff_head *sk_queue = &sk->sk_receive_queue; struct sk_buff *skb; int total = 0; int res; spin_lock_bh(&rcvq->lock); - while ((skb = skb_peek(rcvq)) != NULL && - udp_lib_checksum_complete(skb)) { - __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, - IS_UDPLITE(sk)); - __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, - IS_UDPLITE(sk)); - atomic_inc(&sk->sk_drops); - __skb_unlink(skb, rcvq); - total += skb->truesize; - kfree_skb(skb); + skb = __first_packet_length(sk, rcvq, &total); + if (!skb && !skb_queue_empty(sk_queue)) { + spin_lock(&sk_queue->lock); + skb_queue_splice_tail_init(sk_queue, rcvq); + spin_unlock(&sk_queue->lock); + + skb = __first_packet_length(sk, rcvq, &total); } res = skb ? skb->len : -1; if (total) - udp_rmem_release(sk, total, 1); + udp_rmem_release(sk, total, 1, false); spin_unlock_bh(&rcvq->lock); return res; } @@ -1400,6 +1479,77 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) } EXPORT_SYMBOL(udp_ioctl); +struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, + int noblock, int *peeked, int *off, int *err) +{ + struct sk_buff_head *sk_queue = &sk->sk_receive_queue; + struct sk_buff_head *queue; + struct sk_buff *last; + long timeo; + int error; + + queue = &udp_sk(sk)->reader_queue; + flags |= noblock ? MSG_DONTWAIT : 0; + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + do { + struct sk_buff *skb; + + error = sock_error(sk); + if (error) + break; + + error = -EAGAIN; + *peeked = 0; + do { + spin_lock_bh(&queue->lock); + skb = __skb_try_recv_from_queue(sk, queue, flags, + udp_skb_destructor, + peeked, off, err, + &last); + if (skb) { + spin_unlock_bh(&queue->lock); + return skb; + } + + if (skb_queue_empty(sk_queue)) { + spin_unlock_bh(&queue->lock); + goto busy_check; + } + + /* refill the reader queue and walk it again + * keep both queues locked to avoid re-acquiring + * the sk_receive_queue lock if fwd memory scheduling + * is needed. + */ + spin_lock(&sk_queue->lock); + skb_queue_splice_tail_init(sk_queue, queue); + + skb = __skb_try_recv_from_queue(sk, queue, flags, + udp_skb_dtor_locked, + peeked, off, err, + &last); + spin_unlock(&sk_queue->lock); + spin_unlock_bh(&queue->lock); + if (skb) + return skb; + +busy_check: + if (!sk_can_busy_loop(sk)) + break; + + sk_busy_loop(sk, flags & MSG_DONTWAIT); + } while (!skb_queue_empty(sk_queue)); + + /* sk_queue is empty, reader_queue may contain peeked packets */ + } while (timeo && + !__skb_wait_for_more_packets(sk, &error, &timeo, + (struct sk_buff *)sk_queue)); + + *err = error; + return NULL; +} +EXPORT_SYMBOL_GPL(__skb_recv_udp); + /* * This should be easy, if there is something there we * return it, otherwise we block. @@ -1426,7 +1576,7 @@ try_again: if (!skb) return err; - ulen = skb->len; + ulen = udp_skb_len(skb); copied = len; if (copied > ulen - off) copied = ulen - off; @@ -1441,14 +1591,18 @@ try_again: if (copied < ulen || peeking || (is_udplite && UDP_SKB_CB(skb)->partial_cov)) { - checksum_valid = !udp_lib_checksum_complete(skb); + checksum_valid = udp_skb_csum_unnecessary(skb) || + !__udp_lib_checksum_complete(skb); if (!checksum_valid) goto csum_copy_err; } - if (checksum_valid || skb_csum_unnecessary(skb)) - err = skb_copy_datagram_msg(skb, off, msg, copied); - else { + if (checksum_valid || udp_skb_csum_unnecessary(skb)) { + if (udp_skb_is_linear(skb)) + err = copy_linear_skb(skb, copied, off, &msg->msg_iter); + else + err = skb_copy_datagram_msg(skb, off, msg, copied); + } else { err = skb_copy_and_csum_datagram_msg(skb, off, msg); if (err == -EINVAL) @@ -1490,7 +1644,8 @@ try_again: return err; csum_copy_err: - if (!__sk_queue_drop_skb(sk, skb, flags, udp_skb_destructor)) { + if (!__sk_queue_drop_skb(sk, &udp_sk(sk)->reader_queue, skb, flags, + udp_skb_destructor)) { UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); } @@ -1624,6 +1779,9 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) sk_mark_napi_id_once(sk, skb); } + /* clear all pending head states while they are hot in the cache */ + skb_release_head_state(skb); + rc = __udp_enqueue_schedule_skb(sk, skb); if (rc < 0) { int is_udplite = IS_UDPLITE(sk); @@ -1738,6 +1896,7 @@ static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } } + prefetch(&sk->sk_rmem_alloc); if (rcu_access_pointer(sk->sk_filter) && udp_lib_checksum_complete(skb)) goto csum_error; @@ -1766,9 +1925,10 @@ static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) { struct dst_entry *old; - dst_hold(dst); - old = xchg(&sk->sk_rx_dst, dst); - dst_release(old); + if (dst_hold_safe(dst)) { + old = xchg(&sk->sk_rx_dst, dst); + dst_release(old); + } } /* @@ -2082,7 +2242,7 @@ void udp_v4_early_demux(struct sk_buff *skb) uh->source, iph->saddr, dif); } - if (!sk || !atomic_inc_not_zero_hint(&sk->sk_refcnt, 2)) + if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt)) return; skb->sk = sk; @@ -2092,13 +2252,11 @@ void udp_v4_early_demux(struct sk_buff *skb) if (dst) dst = dst_check(dst, 0); if (dst) { - /* DST_NOCACHE can not be used without taking a reference */ - if (dst->flags & DST_NOCACHE) { - if (likely(atomic_inc_not_zero(&dst->__refcnt))) - skb_dst_set(skb, dst); - } else { - skb_dst_set_noref(skb, dst); - } + /* set noref for now. + * any place which wants to hold dst has to call + * dst_hold_safe() + */ + skb_dst_set_noref(skb, dst); } } @@ -2325,6 +2483,9 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) unsigned int mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk; + if (!skb_queue_empty(&udp_sk(sk)->reader_queue)) + mask |= POLLIN | POLLRDNORM; + sock_rps_record_flow(sk); /* Check for false positives due to checksum errors */ @@ -2530,7 +2691,7 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f, 0, 0L, 0, from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)), 0, sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp, + refcount_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); } diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 9a89c10a55f0..4515836d2a3a 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -55,7 +55,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, req->id.idiag_dport, req->id.idiag_if, tbl, NULL); #endif - if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; rcu_read_unlock(); err = -ENOENT; @@ -206,7 +206,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb, return -EINVAL; } - if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; rcu_read_unlock(); diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c index 71acd0014f2d..856d2dfdb44b 100644 --- a/net/ipv4/xfrm4_mode_beet.c +++ b/net/ipv4/xfrm4_mode_beet.c @@ -57,8 +57,7 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb) xfrm4_beet_make_header(skb); - ph = (struct ip_beet_phdr *) - __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdrlen); + ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdrlen); top_iph = ip_hdr(skb); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1d2dbace42ff..3c46e9513a31 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -426,7 +426,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) } /* One reference from device. */ - in6_dev_hold(ndev); + refcount_set(&ndev->refcnt, 1); if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) ndev->cnf.accept_dad = -1; @@ -963,6 +963,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, struct net *net = dev_net(idev->dev); struct inet6_ifaddr *ifa = NULL; struct rt6_info *rt; + struct in6_validator_info i6vi; unsigned int hash; int err = 0; int addr_type = ipv6_addr_type(addr); @@ -974,6 +975,9 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, return ERR_PTR(-EADDRNOTAVAIL); rcu_read_lock_bh(); + + in6_dev_hold(idev); + if (idev->dead) { err = -ENODEV; /*XXX*/ goto out2; @@ -984,6 +988,17 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, goto out2; } + i6vi.i6vi_addr = *addr; + i6vi.i6vi_dev = idev; + rcu_read_unlock_bh(); + + err = inet6addr_validator_notifier_call_chain(NETDEV_UP, &i6vi); + + rcu_read_lock_bh(); + err = notifier_to_errno(err); + if (err) + goto out2; + spin_lock(&addrconf_hash_lock); /* Ignore adding duplicate addresses on an interface */ @@ -1034,9 +1049,8 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, ifa->rt = rt; ifa->idev = idev; - in6_dev_hold(idev); /* For caller */ - in6_ifa_hold(ifa); + refcount_set(&ifa->refcnt, 1); /* Add to big hash table */ hash = inet6_addr_hash(addr); @@ -1062,6 +1076,7 @@ out2: inet6addr_notifier_call_chain(NETDEV_UP, ifa); else { kfree(ifa); + in6_dev_put(idev); ifa = ERR_PTR(err); } @@ -1912,15 +1927,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) if (dad_failed) ifp->flags |= IFA_F_DADFAILED; - if (ifp->flags&IFA_F_PERMANENT) { - spin_lock_bh(&ifp->lock); - addrconf_del_dad_work(ifp); - ifp->flags |= IFA_F_TENTATIVE; - spin_unlock_bh(&ifp->lock); - if (dad_failed) - ipv6_ifa_notify(0, ifp); - in6_ifa_put(ifp); - } else if (ifp->flags&IFA_F_TEMPORARY) { + if (ifp->flags&IFA_F_TEMPORARY) { struct inet6_ifaddr *ifpub; spin_lock_bh(&ifp->lock); ifpub = ifp->ifpub; @@ -1933,6 +1940,14 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) spin_unlock_bh(&ifp->lock); } ipv6_del_addr(ifp); + } else if (ifp->flags&IFA_F_PERMANENT || !dad_failed) { + spin_lock_bh(&ifp->lock); + addrconf_del_dad_work(ifp); + ifp->flags |= IFA_F_TENTATIVE; + spin_unlock_bh(&ifp->lock); + if (dad_failed) + ipv6_ifa_notify(0, ifp); + in6_ifa_put(ifp); } else { ipv6_del_addr(ifp); } @@ -2280,7 +2295,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, cfg.fc_flags |= RTF_NONEXTHOP; #endif - ip6_route_add(&cfg); + ip6_route_add(&cfg, NULL); } @@ -2335,7 +2350,7 @@ static void addrconf_add_mroute(struct net_device *dev) ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0); - ip6_route_add(&cfg); + ip6_route_add(&cfg, NULL); } static struct inet6_dev *addrconf_add_dev(struct net_device *dev) @@ -5562,8 +5577,8 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) ip6_del_rt(rt); } if (ifp->rt) { - dst_hold(&ifp->rt->dst); - ip6_del_rt(ifp->rt); + if (dst_hold_safe(&ifp->rt->dst)) + ip6_del_rt(ifp->rt); } rt_genid_bump_ipv6(net); break; diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index bfa941fc1165..9e3488d50b15 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -88,6 +88,7 @@ int __ipv6_addr_type(const struct in6_addr *addr) EXPORT_SYMBOL(__ipv6_addr_type); static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); +static ATOMIC_NOTIFIER_HEAD(inet6addr_validator_chain); int register_inet6addr_notifier(struct notifier_block *nb) { @@ -107,6 +108,24 @@ int inet6addr_notifier_call_chain(unsigned long val, void *v) } EXPORT_SYMBOL(inet6addr_notifier_call_chain); +int register_inet6addr_validator_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_register(&inet6addr_validator_chain, nb); +} +EXPORT_SYMBOL(register_inet6addr_validator_notifier); + +int unregister_inet6addr_validator_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&inet6addr_validator_chain, nb); +} +EXPORT_SYMBOL(unregister_inet6addr_validator_notifier); + +int inet6addr_validator_notifier_call_chain(unsigned long val, void *v) +{ + return atomic_notifier_call_chain(&inet6addr_validator_chain, val, v); +} +EXPORT_SYMBOL(inet6addr_validator_notifier_call_chain); + static int eafnosupport_ipv6_dst_lookup(struct net *net, struct sock *u1, struct dst_entry **u2, struct flowi6 *u3) diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 07cd7d248bb6..7a428f65c7ec 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -18,6 +18,7 @@ #include <linux/if_addrlabel.h> #include <linux/netlink.h> #include <linux/rtnetlink.h> +#include <linux/refcount.h> #if 0 #define ADDRLABEL(x...) printk(x) @@ -36,7 +37,7 @@ struct ip6addrlbl_entry { int addrtype; u32 label; struct hlist_node list; - atomic_t refcnt; + refcount_t refcnt; struct rcu_head rcu; }; @@ -137,12 +138,12 @@ static void ip6addrlbl_free_rcu(struct rcu_head *h) static bool ip6addrlbl_hold(struct ip6addrlbl_entry *p) { - return atomic_inc_not_zero(&p->refcnt); + return refcount_inc_not_zero(&p->refcnt); } static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p) { - if (atomic_dec_and_test(&p->refcnt)) + if (refcount_dec_and_test(&p->refcnt)) call_rcu(&p->rcu, ip6addrlbl_free_rcu); } @@ -236,7 +237,7 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, newp->label = label; INIT_HLIST_NODE(&newp->list); write_pnet(&newp->lbl_net, net); - atomic_set(&newp->refcnt, 1); + refcount_set(&newp->refcnt, 1); return newp; } diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index dda6035e3b84..7802b72196f3 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -25,6 +25,7 @@ #define pr_fmt(fmt) "IPv6: " fmt +#include <crypto/algapi.h> #include <crypto/hash.h> #include <linux/module.h> #include <linux/slab.h> @@ -423,7 +424,9 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); sg_init_table(sg, nfrags + sglists); - skb_to_sgvec_nomark(skb, sg, 0, skb->len); + err = skb_to_sgvec_nomark(skb, sg, 0, skb->len); + if (unlikely(err < 0)) + goto out_free; if (x->props.flags & XFRM_STATE_ESN) { /* Attach seqhi sg right after packet payload */ @@ -481,7 +484,7 @@ static void ah6_input_done(struct crypto_async_request *base, int err) auth_data = ah_tmp_auth(work_iph, hdr_len); icv = ah_tmp_icv(ahp->ahash, auth_data, ahp->icv_trunc_len); - err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0; + err = crypto_memneq(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0; if (err) goto out; @@ -606,7 +609,9 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) ip6h->hop_limit = 0; sg_init_table(sg, nfrags + sglists); - skb_to_sgvec_nomark(skb, sg, 0, skb->len); + err = skb_to_sgvec_nomark(skb, sg, 0, skb->len); + if (unlikely(err < 0)) + goto out_free; if (x->props.flags & XFRM_STATE_ESN) { /* Attach seqhi sg right after packet payload */ @@ -627,7 +632,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) goto out_free; } - err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0; + err = crypto_memneq(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0; if (err) goto out_free; diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 514ac259f543..0bbab8a4b5d8 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -203,12 +203,12 @@ void ipv6_sock_ac_close(struct sock *sk) static void aca_get(struct ifacaddr6 *aca) { - atomic_inc(&aca->aca_refcnt); + refcount_inc(&aca->aca_refcnt); } static void aca_put(struct ifacaddr6 *ac) { - if (atomic_dec_and_test(&ac->aca_refcnt)) { + if (refcount_dec_and_test(&ac->aca_refcnt)) { in6_dev_put(ac->aca_idev); dst_release(&ac->aca_rt->dst); kfree(ac); @@ -232,7 +232,7 @@ static struct ifacaddr6 *aca_alloc(struct rt6_info *rt, aca->aca_users = 1; /* aca_tstamp should be updated upon changes */ aca->aca_cstamp = aca->aca_tstamp = jiffies; - atomic_set(&aca->aca_refcnt, 1); + refcount_set(&aca->aca_refcnt, 1); return aca; } diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c index 8d772fea1dde..1323b9679cf7 100644 --- a/net/ipv6/calipso.c +++ b/net/ipv6/calipso.c @@ -227,7 +227,7 @@ static int calipso_cache_check(const unsigned char *key, entry->key_len == key_len && memcmp(entry->key, key, key_len) == 0) { entry->activity += 1; - atomic_inc(&entry->lsm_data->refcount); + refcount_inc(&entry->lsm_data->refcount); secattr->cache = entry->lsm_data; secattr->flags |= NETLBL_SECATTR_CACHE; secattr->type = NETLBL_NLTYPE_CALIPSO; @@ -296,7 +296,7 @@ static int calipso_cache_add(const unsigned char *calipso_ptr, } entry->key_len = calipso_ptr_len; entry->hash = calipso_map_cache_hash(calipso_ptr, calipso_ptr_len); - atomic_inc(&secattr->cache->refcount); + refcount_inc(&secattr->cache->refcount); entry->lsm_data = secattr->cache; bkt = entry->hash & (CALIPSO_CACHE_BUCKETS - 1); @@ -338,7 +338,7 @@ static struct calipso_doi *calipso_doi_search(u32 doi) struct calipso_doi *iter; list_for_each_entry_rcu(iter, &calipso_doi_list, list) - if (iter->doi == doi && atomic_read(&iter->refcount)) + if (iter->doi == doi && refcount_read(&iter->refcount)) return iter; return NULL; } @@ -370,7 +370,7 @@ static int calipso_doi_add(struct calipso_doi *doi_def, if (doi_def->doi == CALIPSO_DOI_UNKNOWN) goto doi_add_return; - atomic_set(&doi_def->refcount, 1); + refcount_set(&doi_def->refcount, 1); spin_lock(&calipso_doi_list_lock); if (calipso_doi_search(doi_def->doi)) { @@ -458,7 +458,7 @@ static int calipso_doi_remove(u32 doi, struct netlbl_audit *audit_info) ret_val = -ENOENT; goto doi_remove_return; } - if (!atomic_dec_and_test(&doi_def->refcount)) { + if (!refcount_dec_and_test(&doi_def->refcount)) { spin_unlock(&calipso_doi_list_lock); ret_val = -EBUSY; goto doi_remove_return; @@ -499,7 +499,7 @@ static struct calipso_doi *calipso_doi_getdef(u32 doi) doi_def = calipso_doi_search(doi); if (!doi_def) goto doi_getdef_return; - if (!atomic_inc_not_zero(&doi_def->refcount)) + if (!refcount_inc_not_zero(&doi_def->refcount)) doi_def = NULL; doi_getdef_return: @@ -520,7 +520,7 @@ static void calipso_doi_putdef(struct calipso_doi *doi_def) if (!doi_def) return; - if (!atomic_dec_and_test(&doi_def->refcount)) + if (!refcount_dec_and_test(&doi_def->refcount)) return; spin_lock(&calipso_doi_list_lock); list_del_rcu(&doi_def->list); @@ -553,7 +553,7 @@ static int calipso_doi_walk(u32 *skip_cnt, rcu_read_lock(); list_for_each_entry_rcu(iter_doi, &calipso_doi_list, list) - if (atomic_read(&iter_doi->refcount) > 0) { + if (refcount_read(&iter_doi->refcount) > 0) { if (doi_cnt++ < *skip_cnt) continue; ret_val = callback(iter_doi, cb_arg); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 5c786f5ab961..a1f918713006 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -1041,6 +1041,6 @@ void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), 0, sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp, + refcount_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 1fe99ba8066c..9ed35473dcb5 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -118,7 +118,6 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead, static void esp_ssg_unref(struct xfrm_state *x, void *tmp) { - __be32 *seqhi; struct crypto_aead *aead = x->data; int seqhilen = 0; u8 *iv; @@ -128,7 +127,6 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp) if (x->props.flags & XFRM_STATE_ESN) seqhilen += sizeof(__be32); - seqhi = esp_tmp_seqhi(tmp); iv = esp_tmp_iv(aead, tmp, seqhilen); req = esp_tmp_req(aead, iv); @@ -224,12 +222,9 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info u8 *vaddr; int nfrags; struct page *page; - struct ip_esp_hdr *esph; struct sk_buff *trailer; int tailen = esp->tailen; - esph = ip_esp_hdr(skb); - if (!skb_cloned(skb)) { if (tailen <= skb_availroom(skb)) { nfrags = 1; @@ -280,7 +275,7 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info skb->data_len += tailen; skb->truesize += tailen; if (sk) - atomic_add(tailen, &sk->sk_wmem_alloc); + refcount_add(tailen, &sk->sk_wmem_alloc); goto out; } @@ -346,9 +341,11 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info esph = esp_output_set_esn(skb, x, ip_esp_hdr(skb), seqhi); sg_init_table(sg, esp->nfrags); - skb_to_sgvec(skb, sg, - (unsigned char *)esph - skb->data, - assoclen + ivlen + esp->clen + alen); + err = skb_to_sgvec(skb, sg, + (unsigned char *)esph - skb->data, + assoclen + ivlen + esp->clen + alen); + if (unlikely(err < 0)) + goto error; if (!esp->inplace) { int allocsize; @@ -372,9 +369,11 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info spin_unlock_bh(&x->lock); sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1); - skb_to_sgvec(skb, dsg, - (unsigned char *)esph - skb->data, - assoclen + ivlen + esp->clen + alen); + err = skb_to_sgvec(skb, dsg, + (unsigned char *)esph - skb->data, + assoclen + ivlen + esp->clen + alen); + if (unlikely(err < 0)) + goto error; } if ((x->props.flags & XFRM_STATE_ESN)) @@ -534,7 +533,7 @@ static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi) * decryption. */ if ((x->props.flags & XFRM_STATE_ESN)) { - esph = (void *)skb_push(skb, 4); + esph = skb_push(skb, 4); *seqhi = esph->spi; esph->spi = esph->seq_no; esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi; @@ -618,7 +617,9 @@ skip_cow: esp_input_set_header(skb, seqhi); sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, 0, skb->len); + ret = skb_to_sgvec(skb, sg, 0, skb->len); + if (unlikely(ret < 0)) + goto out; skb->ip_summed = CHECKSUM_NONE; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index b636f1da9aec..4996d734f1d2 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -847,7 +847,7 @@ static void ipv6_push_rthdr0(struct sk_buff *skb, u8 *proto, ihdr = (struct rt0_hdr *) opt; - phdr = (struct rt0_hdr *) skb_push(skb, (ihdr->rt_hdr.hdrlen + 1) << 3); + phdr = skb_push(skb, (ihdr->rt_hdr.hdrlen + 1) << 3); memcpy(phdr, ihdr, sizeof(struct rt0_hdr)); hops = ihdr->rt_hdr.hdrlen >> 1; @@ -873,7 +873,7 @@ static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto, sr_ihdr = (struct ipv6_sr_hdr *)opt; plen = (sr_ihdr->hdrlen + 1) << 3; - sr_phdr = (struct ipv6_sr_hdr *)skb_push(skb, plen); + sr_phdr = skb_push(skb, plen); memcpy(sr_phdr, sr_ihdr, sizeof(struct ipv6_sr_hdr)); hops = sr_ihdr->first_segment + 1; @@ -923,7 +923,7 @@ static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto, static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, u8 type, struct ipv6_opt_hdr *opt) { - struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_push(skb, ipv6_optlen(opt)); + struct ipv6_opt_hdr *h = skb_push(skb, ipv6_optlen(opt)); memcpy(h, opt, ipv6_optlen(opt)); h->nexthdr = *proto; @@ -971,7 +971,7 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt) *((char **)&opt2->dst1opt) += dif; if (opt2->srcrt) *((char **)&opt2->srcrt) += dif; - atomic_set(&opt2->refcnt, 1); + refcount_set(&opt2->refcnt, 1); } return opt2; } @@ -1056,7 +1056,7 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, return ERR_PTR(-ENOBUFS); memset(opt2, 0, tot_len); - atomic_set(&opt2->refcnt, 1); + refcount_set(&opt2->refcnt, 1); opt2->tot_len = tot_len; p = (char *)(opt2 + 1); diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c index 9ea249b9451e..6de3c04b0f30 100644 --- a/net/ipv6/fou6.c +++ b/net/ipv6/fou6.c @@ -14,6 +14,8 @@ #include <net/udp.h> #include <net/udp_tunnel.h> +#if IS_ENABLED(CONFIG_IPV6_FOU_TUNNEL) + static void fou6_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e, struct flowi6 *fl6, u8 *protocol, __be16 sport) { @@ -33,8 +35,8 @@ static void fou6_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e, *protocol = IPPROTO_UDP; } -int fou6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, - u8 *protocol, struct flowi6 *fl6) +static int fou6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, struct flowi6 *fl6) { __be16 sport; int err; @@ -49,10 +51,9 @@ int fou6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, return 0; } -EXPORT_SYMBOL(fou6_build_header); -int gue6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, - u8 *protocol, struct flowi6 *fl6) +static int gue6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, struct flowi6 *fl6) { __be16 sport; int err; @@ -67,9 +68,6 @@ int gue6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, return 0; } -EXPORT_SYMBOL(gue6_build_header); - -#if IS_ENABLED(CONFIG_IPV6_FOU_TUNNEL) static const struct ip6_tnl_encap_ops fou_ip6tun_ops = { .encap_hlen = fou_encap_hlen, diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index b3df03e3faa0..0c02a09bc351 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -91,7 +91,7 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) drop: kfree_skb(skb); - return -EINVAL; + return err; } static int ila_input(struct sk_buff *skb) @@ -117,7 +117,8 @@ static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { static int ila_build_state(struct nlattr *nla, unsigned int family, const void *cfg, - struct lwtunnel_state **ts) + struct lwtunnel_state **ts, + struct netlink_ext_ack *extack) { struct ila_lwt *ilwt; struct ila_params *p; @@ -146,7 +147,7 @@ static int ila_build_state(struct nlattr *nla, return -EINVAL; } - ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, NULL); + ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, extack); if (ret < 0) return ret; diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index d0900918a19e..b13b8f93079d 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -75,7 +75,7 @@ begin: continue; if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif)) continue; - if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) + if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) goto out; if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif))) { @@ -172,7 +172,7 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, ntohs(dport), dif, &refcounted); - if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt)) + if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index e6b78ba0e636..5477ba729c36 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -153,11 +153,6 @@ static void node_free(struct fib6_node *fn) kmem_cache_free(fib6_node_kmem, fn); } -static void rt6_rcu_free(struct rt6_info *rt) -{ - call_rcu(&rt->dst.rcu_head, dst_rcu_free); -} - static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) { int cpu; @@ -172,7 +167,8 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) ppcpu_rt = per_cpu_ptr(non_pcpu_rt->rt6i_pcpu, cpu); pcpu_rt = *ppcpu_rt; if (pcpu_rt) { - rt6_rcu_free(pcpu_rt); + dst_dev_put(&pcpu_rt->dst); + dst_release(&pcpu_rt->dst); *ppcpu_rt = NULL; } } @@ -185,7 +181,8 @@ static void rt6_release(struct rt6_info *rt) { if (atomic_dec_and_test(&rt->rt6i_ref)) { rt6_free_pcpu(rt); - rt6_rcu_free(rt); + dst_dev_put(&rt->dst); + dst_release(&rt->dst); } } @@ -472,7 +469,8 @@ out: static struct fib6_node *fib6_add_1(struct fib6_node *root, struct in6_addr *addr, int plen, int offset, int allow_create, - int replace_required, int sernum) + int replace_required, int sernum, + struct netlink_ext_ack *extack) { struct fib6_node *fn, *in, *ln; struct fib6_node *pn = NULL; @@ -496,6 +494,8 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root, !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) { if (!allow_create) { if (replace_required) { + NL_SET_ERR_MSG(extack, + "Can not replace route - no match found"); pr_warn("Can't replace route, no match found\n"); return ERR_PTR(-ENOENT); } @@ -542,6 +542,8 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root, * That would keep IPv6 consistent with IPv4 */ if (replace_required) { + NL_SET_ERR_MSG(extack, + "Can not replace route - no match found"); pr_warn("Can't replace route, no match found\n"); return ERR_PTR(-ENOENT); } @@ -963,7 +965,8 @@ void fib6_force_start_gc(struct net *net) */ int fib6_add(struct fib6_node *root, struct rt6_info *rt, - struct nl_info *info, struct mx6_config *mxc) + struct nl_info *info, struct mx6_config *mxc, + struct netlink_ext_ack *extack) { struct fib6_node *fn, *pn = NULL; int err = -ENOMEM; @@ -971,8 +974,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, int replace_required = 0; int sernum = fib6_new_sernum(info->nl_net); - if (WARN_ON_ONCE((rt->dst.flags & DST_NOCACHE) && - !atomic_read(&rt->dst.__refcnt))) + if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt))) return -EINVAL; if (info->nlh) { @@ -986,7 +988,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, fn = fib6_add_1(root, &rt->rt6i_dst.addr, rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst), allow_create, - replace_required, sernum); + replace_required, sernum, extack); if (IS_ERR(fn)) { err = PTR_ERR(fn); fn = NULL; @@ -1027,7 +1029,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, sn = fib6_add_1(sfn, &rt->rt6i_src.addr, rt->rt6i_src.plen, offsetof(struct rt6_info, rt6i_src), - allow_create, replace_required, sernum); + allow_create, replace_required, sernum, + extack); if (IS_ERR(sn)) { /* If it is failed, discard just allocated @@ -1046,7 +1049,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr, rt->rt6i_src.plen, offsetof(struct rt6_info, rt6i_src), - allow_create, replace_required, sernum); + allow_create, replace_required, sernum, + extack); if (IS_ERR(sn)) { err = PTR_ERR(sn); @@ -1067,7 +1071,6 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, fib6_start_gc(info->nl_net, rt); if (!(rt->rt6i_flags & RTF_CACHE)) fib6_prune_clones(info->nl_net, pn); - rt->dst.flags &= ~DST_NOCACHE; } out: @@ -1092,8 +1095,10 @@ out: atomic_inc(&pn->leaf->rt6i_ref); } #endif - if (!(rt->dst.flags & DST_NOCACHE)) - dst_free(&rt->dst); + /* Always release dst as dst->__refcnt is guaranteed + * to be taken before entering this function + */ + dst_release_immediate(&rt->dst); } return err; @@ -1104,8 +1109,10 @@ out: st_failure: if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT))) fib6_repair_tree(info->nl_net, fn); - if (!(rt->dst.flags & DST_NOCACHE)) - dst_free(&rt->dst); + /* Always release dst as dst->__refcnt is guaranteed + * to be taken before entering this function + */ + dst_release_immediate(&rt->dst); return err; #endif } @@ -1774,7 +1781,7 @@ static int fib6_age(struct rt6_info *rt, void *arg) } gc_args->more++; } else if (rt->rt6i_flags & RTF_CACHE) { - if (atomic_read(&rt->dst.__refcnt) == 0 && + if (atomic_read(&rt->dst.__refcnt) == 1 && time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) { RT6_TRACE("aging clone %p\n", rt); return -1; @@ -1812,8 +1819,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force) } gc_args.timeout = expires ? (int)expires : net->ipv6.sysctl.ip6_rt_gc_interval; - - gc_args.more = icmp6_dst_gc(); + gc_args.more = 0; fib6_clean_all(net, fib6_age, &gc_args); now = jiffies; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 64eea3962733..67ff2aaf5dcb 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -942,7 +942,7 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, const void *daddr, const void *saddr, unsigned int len) { struct ip6_tnl *t = netdev_priv(dev); - struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen); + struct ipv6hdr *ipv6h = skb_push(skb, t->hlen); __be16 *p = (__be16 *)(ipv6h+1); ip6_flow_hdr(ipv6h, 0, @@ -1170,7 +1170,8 @@ static struct pernet_operations ip6gre_net_ops = { .size = sizeof(struct ip6gre_net), }; -static int ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) +static int ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) { __be16 flags; @@ -1188,7 +1189,8 @@ static int ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } -static int ip6gre_tap_validate(struct nlattr *tb[], struct nlattr *data[]) +static int ip6gre_tap_validate(struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) { struct in6_addr daddr; @@ -1209,7 +1211,7 @@ static int ip6gre_tap_validate(struct nlattr *tb[], struct nlattr *data[]) } out: - return ip6gre_tunnel_validate(tb, data); + return ip6gre_tunnel_validate(tb, data, extack); } @@ -1342,7 +1344,8 @@ static bool ip6gre_netlink_encap_parms(struct nlattr *data[], } static int ip6gre_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) { struct ip6_tnl *nt; struct net *net = dev_net(dev); @@ -1403,7 +1406,8 @@ out: } static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], - struct nlattr *data[]) + struct nlattr *data[], + struct netlink_ext_ack *extack) { struct ip6_tnl *t, *nt = netdev_priv(dev); struct net *net = nt->net; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 1699acb2fa2c..1422d6c08377 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -67,9 +67,6 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * struct in6_addr *nexthop; int ret; - skb->protocol = htons(ETH_P_IPV6); - skb->dev = dev; - if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); @@ -154,6 +151,9 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) struct net_device *dev = skb_dst(skb)->dev; struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); + skb->protocol = htons(ETH_P_IPV6); + skb->dev = dev; + if (unlikely(idev->cnf.disable_ipv6)) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); @@ -682,7 +682,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, skb_frag_list_init(skb); __skb_pull(skb, hlen); - fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr)); + fh = __skb_push(skb, sizeof(struct frag_hdr)); __skb_push(skb, hlen); skb_reset_network_header(skb); memcpy(skb_network_header(skb), tmp_hdr, hlen); @@ -698,15 +698,13 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr)); - dst_hold(&rt->dst); - for (;;) { /* Prepare header of the next frame, * before previous one went down. */ if (frag) { frag->ip_summed = CHECKSUM_NONE; skb_reset_transport_header(frag); - fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr)); + fh = __skb_push(frag, sizeof(struct frag_hdr)); __skb_push(frag, hlen); skb_reset_network_header(frag); memcpy(skb_network_header(frag), tmp_hdr, @@ -742,7 +740,6 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, if (err == 0) { IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGOKS); - ip6_rt_put(rt); return 0; } @@ -750,7 +747,6 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGFAILS); - ip6_rt_put(rt); return err; slow_path_clean: @@ -869,7 +865,6 @@ fail_toobig: if (skb->sk && dst_allfrag(skb_dst(skb))) sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); - skb->dev = skb_dst(skb)->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); err = -EMSGSIZE; @@ -1477,7 +1472,7 @@ alloc_new_skb: (flags & MSG_DONTWAIT), &err); } else { skb = NULL; - if (atomic_read(&sk->sk_wmem_alloc) <= + if (refcount_read(&sk->sk_wmem_alloc) <= 2 * sk->sk_sndbuf) skb = sock_wmalloc(sk, alloclen + hh_len, 1, @@ -1586,7 +1581,7 @@ alloc_new_skb: skb->len += copy; skb->data_len += copy; skb->truesize += copy; - atomic_add(copy, &sk->sk_wmem_alloc); + refcount_add(copy, &sk->sk_wmem_alloc); } offset += copy; length -= copy; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 8c6c3c8e7eef..3a0ba2ae4b0f 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1885,7 +1885,8 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) return 0; } -static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[]) +static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) { u8 proto; @@ -1974,7 +1975,8 @@ static bool ip6_tnl_netlink_encap_parms(struct nlattr *data[], } static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) { struct net *net = dev_net(dev); struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); @@ -2005,7 +2007,8 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, } static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], - struct nlattr *data[]) + struct nlattr *data[], + struct netlink_ext_ack *extack) { struct ip6_tnl *t = netdev_priv(dev); struct __ip6_tnl_parm p; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 837ea1eefe7f..486c2305f53c 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -907,7 +907,8 @@ static int __net_init vti6_fb_tnl_dev_init(struct net_device *dev) return 0; } -static int vti6_validate(struct nlattr *tb[], struct nlattr *data[]) +static int vti6_validate(struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) { return 0; } @@ -940,7 +941,8 @@ static void vti6_netlink_parms(struct nlattr *data[], } static int vti6_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) { struct net *net = dev_net(dev); struct ip6_tnl *nt; @@ -966,7 +968,8 @@ static void vti6_dellink(struct net_device *dev, struct list_head *head) } static int vti6_changelink(struct net_device *dev, struct nlattr *tb[], - struct nlattr *data[]) + struct nlattr *data[], + struct netlink_ext_ack *extack) { struct ip6_tnl *t; struct __ip6_tnl_parm p; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 2ecb39b943b5..7454850f2098 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -116,6 +116,7 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm); static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc, int cmd); +static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt); static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb); static void mroute_clean_tables(struct mr6_table *mrt, bool all); @@ -846,7 +847,8 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c) while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) { if (ipv6_hdr(skb)->version == 0) { - struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); + struct nlmsghdr *nlh = skb_pull(skb, + sizeof(struct ipv6hdr)); nlh->nlmsg_type = NLMSG_ERROR; nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); skb_trim(skb, nlh->nlmsg_len); @@ -1106,7 +1108,8 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt, while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { if (ipv6_hdr(skb)->version == 0) { - struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); + struct nlmsghdr *nlh = skb_pull(skb, + sizeof(struct ipv6hdr)); if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) { nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; @@ -1123,8 +1126,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt, } /* - * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd - * expects the following bizarre scheme. + * Bounce a cache query up to pim6sd and netlink. * * Called under mrt_lock. */ @@ -1206,6 +1208,8 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt, return -EINVAL; } + mrt6msg_netlink_event(mrt, skb); + /* * Deliver to user space multicast routing algorithms */ @@ -2455,6 +2459,71 @@ errout: rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err); } +static size_t mrt6msg_netlink_msgsize(size_t payloadlen) +{ + size_t len = + NLMSG_ALIGN(sizeof(struct rtgenmsg)) + + nla_total_size(1) /* IP6MRA_CREPORT_MSGTYPE */ + + nla_total_size(4) /* IP6MRA_CREPORT_MIF_ID */ + /* IP6MRA_CREPORT_SRC_ADDR */ + + nla_total_size(sizeof(struct in6_addr)) + /* IP6MRA_CREPORT_DST_ADDR */ + + nla_total_size(sizeof(struct in6_addr)) + /* IP6MRA_CREPORT_PKT */ + + nla_total_size(payloadlen) + ; + + return len; +} + +static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt) +{ + struct net *net = read_pnet(&mrt->net); + struct nlmsghdr *nlh; + struct rtgenmsg *rtgenm; + struct mrt6msg *msg; + struct sk_buff *skb; + struct nlattr *nla; + int payloadlen; + + payloadlen = pkt->len - sizeof(struct mrt6msg); + msg = (struct mrt6msg *)skb_transport_header(pkt); + + skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC); + if (!skb) + goto errout; + + nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT, + sizeof(struct rtgenmsg), 0); + if (!nlh) + goto errout; + rtgenm = nlmsg_data(nlh); + rtgenm->rtgen_family = RTNL_FAMILY_IP6MR; + if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) || + nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) || + nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR, + &msg->im6_src) || + nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR, + &msg->im6_dst)) + goto nla_put_failure; + + nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen); + if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg), + nla_data(nla), payloadlen)) + goto nla_put_failure; + + nlmsg_end(skb, nlh); + + rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC); + return; + +nla_put_failure: + nlmsg_cancel(skb, nlh); +errout: + kfree_skb(skb); + rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS); +} + static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index a531ba032b85..85404e7c3114 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -505,7 +505,7 @@ sticky_done: break; memset(opt, 0, sizeof(*opt)); - atomic_set(&opt->refcnt, 1); + refcount_set(&opt->refcnt, 1); opt->tot_len = sizeof(*opt) + optlen; retv = -EFAULT; if (copy_from_user(opt+1, optval, optlen)) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 07403fa164e1..12b7c27ce5ce 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -701,7 +701,7 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc) spin_lock_bh(&mc->mca_lock); if (del_timer(&mc->mca_timer)) - atomic_dec(&mc->mca_refcnt); + refcount_dec(&mc->mca_refcnt); spin_unlock_bh(&mc->mca_lock); } @@ -819,12 +819,12 @@ static void mld_clear_delrec(struct inet6_dev *idev) static void mca_get(struct ifmcaddr6 *mc) { - atomic_inc(&mc->mca_refcnt); + refcount_inc(&mc->mca_refcnt); } static void ma_put(struct ifmcaddr6 *mc) { - if (atomic_dec_and_test(&mc->mca_refcnt)) { + if (refcount_dec_and_test(&mc->mca_refcnt)) { in6_dev_put(mc->idev); kfree(mc); } @@ -846,7 +846,7 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev, mc->mca_users = 1; /* mca_stamp should be updated upon changes */ mc->mca_cstamp = mc->mca_tstamp = jiffies; - atomic_set(&mc->mca_refcnt, 1); + refcount_set(&mc->mca_refcnt, 1); spin_lock_init(&mc->mca_lock); /* initial mode is (EX, empty) */ @@ -1065,7 +1065,7 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime) return; if (del_timer(&ma->mca_timer)) { - atomic_dec(&ma->mca_refcnt); + refcount_dec(&ma->mca_refcnt); delay = ma->mca_timer.expires - jiffies; } @@ -1074,7 +1074,7 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime) ma->mca_timer.expires = jiffies + delay; if (!mod_timer(&ma->mca_timer, jiffies + delay)) - atomic_inc(&ma->mca_refcnt); + refcount_inc(&ma->mca_refcnt); ma->mca_flags |= MAF_TIMER_RUNNING; } @@ -1469,7 +1469,7 @@ int igmp6_event_report(struct sk_buff *skb) if (ipv6_addr_equal(&ma->mca_addr, &mld->mld_mca)) { spin_lock(&ma->mca_lock); if (del_timer(&ma->mca_timer)) - atomic_dec(&ma->mca_refcnt); + refcount_dec(&ma->mca_refcnt); ma->mca_flags &= ~(MAF_LAST_REPORTER|MAF_TIMER_RUNNING); spin_unlock(&ma->mca_lock); break; @@ -1602,7 +1602,7 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu) ip6_mc_hdr(sk, skb, dev, saddr, &mld2_all_mcr, NEXTHDR_HOP, 0); - memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra)); + skb_put_data(skb, ra, sizeof(ra)); skb_set_transport_header(skb, skb_tail_pointer(skb) - skb->data); skb_put(skb, sizeof(*pmr)); @@ -1692,7 +1692,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, skb = mld_newpack(pmc->idev, dev->mtu); if (!skb) return NULL; - pgr = (struct mld2_grec *)skb_put(skb, sizeof(struct mld2_grec)); + pgr = skb_put(skb, sizeof(struct mld2_grec)); pgr->grec_type = type; pgr->grec_auxwords = 0; pgr->grec_nsrcs = 0; @@ -1784,7 +1784,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, } if (!skb) return NULL; - psrc = (struct in6_addr *)skb_put(skb, sizeof(*psrc)); + psrc = skb_put(skb, sizeof(*psrc)); *psrc = psf->sf_addr; scount++; stotal++; if ((type == MLD2_ALLOW_NEW_SOURCES || @@ -2006,10 +2006,9 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) ip6_mc_hdr(sk, skb, dev, saddr, snd_addr, NEXTHDR_HOP, payload_len); - memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra)); + skb_put_data(skb, ra, sizeof(ra)); - hdr = (struct mld_msg *) skb_put(skb, sizeof(struct mld_msg)); - memset(hdr, 0, sizeof(struct mld_msg)); + hdr = skb_put_zero(skb, sizeof(struct mld_msg)); hdr->mld_type = type; hdr->mld_mca = *addr; @@ -2392,12 +2391,12 @@ static void igmp6_join_group(struct ifmcaddr6 *ma) spin_lock_bh(&ma->mca_lock); if (del_timer(&ma->mca_timer)) { - atomic_dec(&ma->mca_refcnt); + refcount_dec(&ma->mca_refcnt); delay = ma->mca_timer.expires - jiffies; } if (!mod_timer(&ma->mca_timer, jiffies + delay)) - atomic_inc(&ma->mca_refcnt); + refcount_inc(&ma->mca_refcnt); ma->mca_flags |= MAF_TIMER_RUNNING | MAF_LAST_REPORTER; spin_unlock_bh(&ma->mca_lock); } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index d310dc41209a..0327c1f2e6fc 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -528,7 +528,7 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr, if (!skb) return; - msg = (struct nd_msg *)skb_put(skb, sizeof(*msg)); + msg = skb_put(skb, sizeof(*msg)); *msg = (struct nd_msg) { .icmph = { .icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT, @@ -597,7 +597,7 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, if (!skb) return; - msg = (struct nd_msg *)skb_put(skb, sizeof(*msg)); + msg = skb_put(skb, sizeof(*msg)); *msg = (struct nd_msg) { .icmph = { .icmp6_type = NDISC_NEIGHBOUR_SOLICITATION, @@ -657,7 +657,7 @@ void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, if (!skb) return; - msg = (struct rs_msg *)skb_put(skb, sizeof(*msg)); + msg = skb_put(skb, sizeof(*msg)); *msg = (struct rs_msg) { .icmph = { .icmp6_type = NDISC_ROUTER_SOLICITATION, @@ -1633,7 +1633,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) if (!buff) goto release; - msg = (struct rd_msg *)skb_put(buff, sizeof(*msg)); + msg = skb_put(buff, sizeof(*msg)); *msg = (struct rd_msg) { .icmph = { .icmp6_type = NDISC_REDIRECT, diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index d3c4daa708b9..ce203dd729e0 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -27,7 +27,7 @@ synproxy_build_ip(struct net *net, struct sk_buff *skb, struct ipv6hdr *iph; skb_reset_network_header(skb); - iph = (struct ipv6hdr *)skb_put(skb, sizeof(*iph)); + iph = skb_put(skb, sizeof(*iph)); ip6_flow_hdr(iph, 0, 0); iph->hop_limit = net->ipv6.devconf_all->hop_limit; iph->nexthdr = IPPROTO_TCP; @@ -105,7 +105,7 @@ synproxy_send_client_synack(struct net *net, niph = synproxy_build_ip(net, nskb, &iph->daddr, &iph->saddr); skb_reset_transport_header(nskb); - nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); + nth = skb_put(nskb, tcp_hdr_size); nth->source = th->dest; nth->dest = th->source; nth->seq = htonl(__cookie_v6_init_sequence(iph, th, &mss)); @@ -147,7 +147,7 @@ synproxy_send_server_syn(struct net *net, niph = synproxy_build_ip(net, nskb, &iph->saddr, &iph->daddr); skb_reset_transport_header(nskb); - nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); + nth = skb_put(nskb, tcp_hdr_size); nth->source = th->source; nth->dest = th->dest; nth->seq = htonl(recv_seq - 1); @@ -192,7 +192,7 @@ synproxy_send_server_ack(struct net *net, niph = synproxy_build_ip(net, nskb, &iph->daddr, &iph->saddr); skb_reset_transport_header(nskb); - nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); + nth = skb_put(nskb, tcp_hdr_size); nth->source = th->dest; nth->dest = th->source; nth->seq = htonl(ntohl(th->ack_seq)); @@ -230,7 +230,7 @@ synproxy_send_client_ack(struct net *net, niph = synproxy_build_ip(net, nskb, &iph->saddr, &iph->daddr); skb_reset_transport_header(nskb); - nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); + nth = skb_put(nskb, tcp_hdr_size); nth->source = th->source; nth->dest = th->dest; nth->seq = htonl(ntohl(th->seq) + 1); diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c index 2297c9f073ba..d7b679037bae 100644 --- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c @@ -75,8 +75,8 @@ static int masq_device_event(struct notifier_block *this, struct net *net = dev_net(dev); if (event == NETDEV_DOWN) - nf_ct_iterate_cleanup(net, device_cmp, - (void *)(long)dev->ifindex, 0, 0); + nf_ct_iterate_cleanup_net(net, device_cmp, + (void *)(long)dev->ifindex, 0, 0); return NOTIFY_DONE; } @@ -99,7 +99,7 @@ static void iterate_cleanup_work(struct work_struct *work) w = container_of(work, struct masq_dev_work, work); index = w->ifindex; - nf_ct_iterate_cleanup(w->net, device_cmp, (void *)index, 0, 0); + nf_ct_iterate_cleanup_net(w->net, device_cmp, (void *)index, 0, 0); put_net(w->net); kfree(w); @@ -110,12 +110,12 @@ static void iterate_cleanup_work(struct work_struct *work) /* ipv6 inet notifier is an atomic notifier, i.e. we cannot * schedule. * - * Unfortunately, nf_ct_iterate_cleanup can run for a long + * Unfortunately, nf_ct_iterate_cleanup_net can run for a long * time if there are lots of conntracks and the system * handles high softirq load, so it frequently calls cond_resched * while iterating the conntrack table. * - * So we defer nf_ct_iterate_cleanup walk to the system workqueue. + * So we defer nf_ct_iterate_cleanup_net walk to the system workqueue. * * As we can have 'a lot' of inet_events (depending on amount * of ipv6 addresses being deleted), we also need to add an upper diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index eedee5d108d9..24858402e374 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -95,7 +95,7 @@ void nf_reject_ip6_tcphdr_put(struct sk_buff *nskb, int needs_ack; skb_reset_transport_header(nskb); - tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); + tcph = skb_put(nskb, sizeof(struct tcphdr)); /* Truncate to length (no data) */ tcph->doff = sizeof(struct tcphdr)/4; tcph->source = oth->dest; @@ -220,9 +220,6 @@ static bool reject6_csum_ok(struct sk_buff *skb, int hook) __be16 fo; u8 proto; - if (skb->csum_bad) - return false; - if (skb_csum_unnecessary(skb)) return true; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 322bd62e688b..0488a24c2a44 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -128,7 +128,6 @@ static void rt6_uncached_list_add(struct rt6_info *rt) { struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list); - rt->dst.flags |= DST_NOCACHE; rt->rt6i_uncached_list = ul; spin_lock_bh(&ul->lock); @@ -354,7 +353,7 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net, int flags) { struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, - 0, DST_OBSOLETE_FORCE_CHK, flags); + 1, DST_OBSOLETE_FORCE_CHK, flags); if (rt) rt6_info_init(rt); @@ -381,7 +380,7 @@ struct rt6_info *ip6_dst_alloc(struct net *net, *p = NULL; } } else { - dst_destroy((struct dst_entry *)rt); + dst_release_immediate(&rt->dst); return NULL; } } @@ -932,20 +931,21 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, EXPORT_SYMBOL(rt6_lookup); /* ip6_ins_rt is called with FREE table->tb6_lock. - It takes new route entry, the addition fails by any reason the - route is freed. In any case, if caller does not hold it, it may - be destroyed. + * It takes new route entry, the addition fails by any reason the + * route is released. + * Caller must hold dst before calling it. */ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, - struct mx6_config *mxc) + struct mx6_config *mxc, + struct netlink_ext_ack *extack) { int err; struct fib6_table *table; table = rt->rt6i_table; write_lock_bh(&table->tb6_lock); - err = fib6_add(&table->tb6_root, rt, info, mxc); + err = fib6_add(&table->tb6_root, rt, info, mxc, extack); write_unlock_bh(&table->tb6_lock); return err; @@ -956,7 +956,9 @@ int ip6_ins_rt(struct rt6_info *rt) struct nl_info info = { .nl_net = dev_net(rt->dst.dev), }; struct mx6_config mxc = { .mx = NULL, }; - return __ip6_ins_rt(rt, &info, &mxc); + /* Hold dst to account for the reference from the fib6 tree */ + dst_hold(&rt->dst); + return __ip6_ins_rt(rt, &info, &mxc, NULL); } static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, @@ -1048,7 +1050,7 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) prev = cmpxchg(p, NULL, pcpu_rt); if (prev) { /* If someone did it before us, return prev instead */ - dst_destroy(&pcpu_rt->dst); + dst_release_immediate(&pcpu_rt->dst); pcpu_rt = prev; } } else { @@ -1058,7 +1060,7 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) * since rt is going away anyway. The next * dst_check() will trigger a re-lookup. */ - dst_destroy(&pcpu_rt->dst); + dst_release_immediate(&pcpu_rt->dst); pcpu_rt = rt; } dst_hold(&pcpu_rt->dst); @@ -1128,12 +1130,15 @@ redo_rt6_select: uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL); dst_release(&rt->dst); - if (uncached_rt) + if (uncached_rt) { + /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc() + * No need for another dst_hold() + */ rt6_uncached_list_add(uncached_rt); - else + } else { uncached_rt = net->ipv6.ip6_null_entry; - - dst_hold(&uncached_rt->dst); + dst_hold(&uncached_rt->dst); + } trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6); return uncached_rt; @@ -1244,9 +1249,11 @@ EXPORT_SYMBOL_GPL(ip6_route_output_flags); struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) { struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig; + struct net_device *loopback_dev = net->loopback_dev; struct dst_entry *new = NULL; - rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0); + rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1, + DST_OBSOLETE_NONE, 0); if (rt) { rt6_info_init(rt); @@ -1256,10 +1263,8 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori new->output = dst_discard_out; dst_copy_metrics(new, &ort->dst); - rt->rt6i_idev = ort->rt6i_idev; - if (rt->rt6i_idev) - in6_dev_hold(rt->rt6i_idev); + rt->rt6i_idev = in6_dev_get(loopback_dev); rt->rt6i_gateway = ort->rt6i_gateway; rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU; rt->rt6i_metric = 0; @@ -1268,8 +1273,6 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori #ifdef CONFIG_IPV6_SUBTREES memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); #endif - - dst_free(new); } dst_release(dst_orig); @@ -1322,7 +1325,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) rt6_dst_from_metrics_check(rt); if (rt->rt6i_flags & RTF_PCPU || - (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from)) + (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from)) return rt6_dst_from_check(rt, cookie); else return rt6_check(rt, cookie); @@ -1355,8 +1358,8 @@ static void ip6_link_failure(struct sk_buff *skb) rt = (struct rt6_info *) skb_dst(skb); if (rt) { if (rt->rt6i_flags & RTF_CACHE) { - dst_hold(&rt->dst); - ip6_del_rt(rt); + if (dst_hold_safe(&rt->dst)) + ip6_del_rt(rt); } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) { rt->rt6i_node->fn_sernum = -1; } @@ -1420,6 +1423,10 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, * invalidate the sk->sk_dst_cache. */ ip6_ins_rt(nrt6); + /* Release the reference taken in + * ip6_rt_cache_alloc() + */ + dst_release(&nrt6->dst); } } } @@ -1648,9 +1655,6 @@ out: return mtu - lwtunnel_headroom(dst->lwtstate, mtu); } -static struct dst_entry *icmp6_dst_gc_list; -static DEFINE_SPINLOCK(icmp6_dst_lock); - struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6) { @@ -1671,19 +1675,16 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, rt->dst.flags |= DST_HOST; rt->dst.output = ip6_output; - atomic_set(&rt->dst.__refcnt, 1); rt->rt6i_gateway = fl6->daddr; rt->rt6i_dst.addr = fl6->daddr; rt->rt6i_dst.plen = 128; rt->rt6i_idev = idev; dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0); - spin_lock_bh(&icmp6_dst_lock); - rt->dst.next = icmp6_dst_gc_list; - icmp6_dst_gc_list = &rt->dst; - spin_unlock_bh(&icmp6_dst_lock); - - fib6_force_start_gc(net); + /* Add this dst into uncached_list so that rt6_ifdown() can + * do proper release of the net_device + */ + rt6_uncached_list_add(rt); dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0); @@ -1691,48 +1692,6 @@ out: return dst; } -int icmp6_dst_gc(void) -{ - struct dst_entry *dst, **pprev; - int more = 0; - - spin_lock_bh(&icmp6_dst_lock); - pprev = &icmp6_dst_gc_list; - - while ((dst = *pprev) != NULL) { - if (!atomic_read(&dst->__refcnt)) { - *pprev = dst->next; - dst_free(dst); - } else { - pprev = &dst->next; - ++more; - } - } - - spin_unlock_bh(&icmp6_dst_lock); - - return more; -} - -static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg), - void *arg) -{ - struct dst_entry *dst, **pprev; - - spin_lock_bh(&icmp6_dst_lock); - pprev = &icmp6_dst_gc_list; - while ((dst = *pprev) != NULL) { - struct rt6_info *rt = (struct rt6_info *) dst; - if (func(rt, arg)) { - *pprev = dst->next; - dst_free(dst); - } else { - pprev = &dst->next; - } - } - spin_unlock_bh(&icmp6_dst_lock); -} - static int ip6_dst_gc(struct dst_ops *ops) { struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); @@ -1844,7 +1803,8 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net, return rt; } -static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) +static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, + struct netlink_ext_ack *extack) { struct net *net = cfg->fc_nlinfo.nl_net; struct rt6_info *rt = NULL; @@ -1855,14 +1815,25 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) int err = -EINVAL; /* RTF_PCPU is an internal flag; can not be set by userspace */ - if (cfg->fc_flags & RTF_PCPU) + if (cfg->fc_flags & RTF_PCPU) { + NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU"); goto out; + } - if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) + if (cfg->fc_dst_len > 128) { + NL_SET_ERR_MSG(extack, "Invalid prefix length"); + goto out; + } + if (cfg->fc_src_len > 128) { + NL_SET_ERR_MSG(extack, "Invalid source address length"); goto out; + } #ifndef CONFIG_IPV6_SUBTREES - if (cfg->fc_src_len) + if (cfg->fc_src_len) { + NL_SET_ERR_MSG(extack, + "Specifying source address requires IPV6_SUBTREES to be enabled"); goto out; + } #endif if (cfg->fc_ifindex) { err = -ENODEV; @@ -1926,7 +1897,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) err = lwtunnel_build_state(cfg->fc_encap_type, cfg->fc_encap, AF_INET6, cfg, - &lwtstate); + &lwtstate, extack); if (err) goto out; rt->dst.lwtstate = lwtstate_get(lwtstate); @@ -2013,9 +1984,10 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) err = -EINVAL; if (ipv6_chk_addr_and_flags(net, gw_addr, gwa_type & IPV6_ADDR_LINKLOCAL ? - dev : NULL, 0, 0)) + dev : NULL, 0, 0)) { + NL_SET_ERR_MSG(extack, "Invalid gateway address"); goto out; - + } rt->rt6i_gateway = *gw_addr; if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { @@ -2031,8 +2003,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) addressing */ if (!(gwa_type & (IPV6_ADDR_UNICAST | - IPV6_ADDR_MAPPED))) + IPV6_ADDR_MAPPED))) { + NL_SET_ERR_MSG(extack, + "Invalid gateway address"); goto out; + } if (cfg->fc_table) { grt = ip6_nh_lookup_table(net, cfg, gw_addr); @@ -2072,8 +2047,14 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) goto out; } err = -EINVAL; - if (!dev || (dev->flags & IFF_LOOPBACK)) + if (!dev) { + NL_SET_ERR_MSG(extack, "Egress device not specified"); goto out; + } else if (dev->flags & IFF_LOOPBACK) { + NL_SET_ERR_MSG(extack, + "Egress device can not be loopback device for this route"); + goto out; + } } err = -ENODEV; @@ -2082,6 +2063,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) if (!ipv6_addr_any(&cfg->fc_prefsrc)) { if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) { + NL_SET_ERR_MSG(extack, "Invalid source address"); err = -EINVAL; goto out; } @@ -2106,18 +2088,19 @@ out: if (idev) in6_dev_put(idev); if (rt) - dst_free(&rt->dst); + dst_release_immediate(&rt->dst); return ERR_PTR(err); } -int ip6_route_add(struct fib6_config *cfg) +int ip6_route_add(struct fib6_config *cfg, + struct netlink_ext_ack *extack) { struct mx6_config mxc = { .mx = NULL, }; struct rt6_info *rt; int err; - rt = ip6_route_info_create(cfg); + rt = ip6_route_info_create(cfg, extack); if (IS_ERR(rt)) { err = PTR_ERR(rt); rt = NULL; @@ -2128,14 +2111,14 @@ int ip6_route_add(struct fib6_config *cfg) if (err) goto out; - err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc); + err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack); kfree(mxc.mx); return err; out: if (rt) - dst_free(&rt->dst); + dst_release_immediate(&rt->dst); return err; } @@ -2146,8 +2129,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) struct fib6_table *table; struct net *net = dev_net(rt->dst.dev); - if (rt == net->ipv6.ip6_null_entry || - rt->dst.flags & DST_NOCACHE) { + if (rt == net->ipv6.ip6_null_entry) { err = -ENOENT; goto out; } @@ -2222,7 +2204,8 @@ out_put: return err; } -static int ip6_route_del(struct fib6_config *cfg) +static int ip6_route_del(struct fib6_config *cfg, + struct netlink_ext_ack *extack) { struct fib6_table *table; struct fib6_node *fn; @@ -2230,8 +2213,10 @@ static int ip6_route_del(struct fib6_config *cfg) int err = -ESRCH; table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); - if (!table) + if (!table) { + NL_SET_ERR_MSG(extack, "FIB table does not exist"); return err; + } read_lock_bh(&table->tb6_lock); @@ -2369,7 +2354,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; if (ip6_ins_rt(nrt)) - goto out; + goto out_release; netevent.old = &rt->dst; netevent.new = &nrt->dst; @@ -2382,6 +2367,12 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu ip6_del_rt(rt); } +out_release: + /* Release the reference taken in + * ip6_rt_cache_alloc() + */ + dst_release(&nrt->dst); + out: neigh_release(neigh); } @@ -2483,7 +2474,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net, if (!prefixlen) cfg.fc_flags |= RTF_DEFAULT; - ip6_route_add(&cfg); + ip6_route_add(&cfg, NULL); return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev); } @@ -2529,7 +2520,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, cfg.fc_gateway = *gwaddr; - if (!ip6_route_add(&cfg)) { + if (!ip6_route_add(&cfg, NULL)) { struct fib6_table *table; table = fib6_get_table(dev_net(dev), cfg.fc_table); @@ -2622,10 +2613,10 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) rtnl_lock(); switch (cmd) { case SIOCADDRT: - err = ip6_route_add(&cfg); + err = ip6_route_add(&cfg, NULL); break; case SIOCDELRT: - err = ip6_route_del(&cfg); + err = ip6_route_del(&cfg, NULL); break; default: err = -EINVAL; @@ -2729,9 +2720,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->rt6i_dst.plen = 128; tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL; rt->rt6i_table = fib6_get_table(net, tb_id); - rt->dst.flags |= DST_NOCACHE; - - atomic_set(&rt->dst.__refcnt, 1); return rt; } @@ -2819,7 +2807,6 @@ void rt6_ifdown(struct net *net, struct net_device *dev) }; fib6_clean_all(net, fib6_ifdown, &adn); - icmp6_clean_all(fib6_ifdown, &adn); if (dev) rt6_uncached_list_flush_dev(net, dev); } @@ -2904,7 +2891,8 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, - struct fib6_config *cfg) + struct fib6_config *cfg, + struct netlink_ext_ack *extack) { struct rtmsg *rtm; struct nlattr *tb[RTA_MAX+1]; @@ -2988,7 +2976,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]); err = lwtunnel_valid_encap_type_attr(cfg->fc_mp, - cfg->fc_mp_len); + cfg->fc_mp_len, extack); if (err < 0) goto errout; } @@ -3007,7 +2995,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[RTA_ENCAP_TYPE]) { cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]); - err = lwtunnel_valid_encap_type(cfg->fc_encap_type); + err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack); if (err < 0) goto errout; } @@ -3098,7 +3086,8 @@ static void ip6_route_mpath_notify(struct rt6_info *rt, inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); } -static int ip6_route_multipath_add(struct fib6_config *cfg) +static int ip6_route_multipath_add(struct fib6_config *cfg, + struct netlink_ext_ack *extack) { struct rt6_info *rt_notif = NULL, *rt_last = NULL; struct nl_info *info = &cfg->fc_nlinfo; @@ -3146,7 +3135,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg) r_cfg.fc_encap_type = nla_get_u16(nla); } - rt = ip6_route_info_create(&r_cfg); + rt = ip6_route_info_create(&r_cfg, extack); if (IS_ERR(rt)) { err = PTR_ERR(rt); rt = NULL; @@ -3155,7 +3144,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg) err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg); if (err) { - dst_free(&rt->dst); + dst_release_immediate(&rt->dst); goto cleanup; } @@ -3171,7 +3160,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg) err_nh = NULL; list_for_each_entry(nh, &rt6_nh_list, next) { rt_last = nh->rt6_info; - err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc); + err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack); /* save reference to first route for notification */ if (!rt_notif && !err) rt_notif = nh->rt6_info; @@ -3213,13 +3202,13 @@ add_errout: list_for_each_entry(nh, &rt6_nh_list, next) { if (err_nh == nh) break; - ip6_route_del(&nh->r_cfg); + ip6_route_del(&nh->r_cfg, extack); } cleanup: list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) { if (nh->rt6_info) - dst_free(&nh->rt6_info->dst); + dst_release_immediate(&nh->rt6_info->dst); kfree(nh->mxc.mx); list_del(&nh->next); kfree(nh); @@ -3228,7 +3217,8 @@ cleanup: return err; } -static int ip6_route_multipath_del(struct fib6_config *cfg) +static int ip6_route_multipath_del(struct fib6_config *cfg, + struct netlink_ext_ack *extack) { struct fib6_config r_cfg; struct rtnexthop *rtnh; @@ -3255,7 +3245,7 @@ static int ip6_route_multipath_del(struct fib6_config *cfg) r_cfg.fc_flags |= RTF_GATEWAY; } } - err = ip6_route_del(&r_cfg); + err = ip6_route_del(&r_cfg, extack); if (err) last_err = err; @@ -3271,15 +3261,15 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, struct fib6_config cfg; int err; - err = rtm_to_fib6_config(skb, nlh, &cfg); + err = rtm_to_fib6_config(skb, nlh, &cfg, extack); if (err < 0) return err; if (cfg.fc_mp) - return ip6_route_multipath_del(&cfg); + return ip6_route_multipath_del(&cfg, extack); else { cfg.fc_delete_all_nh = 1; - return ip6_route_del(&cfg); + return ip6_route_del(&cfg, extack); } } @@ -3289,14 +3279,14 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, struct fib6_config cfg; int err; - err = rtm_to_fib6_config(skb, nlh, &cfg); + err = rtm_to_fib6_config(skb, nlh, &cfg, extack); if (err < 0) return err; if (cfg.fc_mp) - return ip6_route_multipath_add(&cfg); + return ip6_route_multipath_add(&cfg, extack); else - return ip6_route_add(&cfg); + return ip6_route_add(&cfg, extack); } static size_t rt6_nlmsg_size(struct rt6_info *rt) @@ -3577,11 +3567,13 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[RTA_MAX+1]; + int err, iif = 0, oif = 0; + struct dst_entry *dst; struct rt6_info *rt; struct sk_buff *skb; struct rtmsg *rtm; struct flowi6 fl6; - int err, iif = 0, oif = 0; + bool fibmatch; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy, extack); @@ -3592,6 +3584,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, memset(&fl6, 0, sizeof(fl6)); rtm = nlmsg_data(nlh); fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0); + fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH); if (tb[RTA_SRC]) { if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) @@ -3637,12 +3630,23 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (!ipv6_addr_any(&fl6.saddr)) flags |= RT6_LOOKUP_F_HAS_SADDR; - rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6, - flags); + if (!fibmatch) + dst = ip6_route_input_lookup(net, dev, &fl6, flags); } else { fl6.flowi6_oif = oif; - rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6); + if (!fibmatch) + dst = ip6_route_output(net, NULL, &fl6); + } + + if (fibmatch) + dst = ip6_route_lookup(net, &fl6, 0); + + rt = container_of(dst, struct rt6_info, dst); + if (rt->dst.error) { + err = rt->dst.error; + ip6_rt_put(rt); + goto errout; } if (rt == net->ipv6.ip6_null_entry) { @@ -3659,10 +3663,14 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, } skb_dst_set(skb, &rt->dst); - - err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, - RTM_NEWROUTE, NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0); + if (fibmatch) + err = rt6_fill_node(net, skb, rt, NULL, NULL, iif, + RTM_NEWROUTE, NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, 0); + else + err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, + RTM_NEWROUTE, NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, 0); if (err < 0) { kfree_skb(skb); goto errout; diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 5f44ffed2576..15fba55e3da8 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -303,13 +303,9 @@ static int seg6_genl_dumphmac_done(struct netlink_callback *cb) static int seg6_genl_dumphmac(struct sk_buff *skb, struct netlink_callback *cb) { struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; - struct net *net = sock_net(skb->sk); - struct seg6_pernet_data *sdata; struct seg6_hmac_info *hinfo; int ret; - sdata = seg6_pernet(net); - ret = rhashtable_walk_start(iter); if (ret && ret != -EAGAIN) goto done; diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 6a495490d43e..264d772d3c7d 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -326,7 +326,8 @@ drop: static int seg6_build_state(struct nlattr *nla, unsigned int family, const void *cfg, - struct lwtunnel_state **ts) + struct lwtunnel_state **ts, + struct netlink_ext_ack *extack) { struct nlattr *tb[SEG6_IPTUNNEL_MAX + 1]; struct seg6_iptunnel_encap *tuninfo; @@ -336,7 +337,7 @@ static int seg6_build_state(struct nlattr *nla, int err; err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla, - seg6_iptunnel_policy, NULL); + seg6_iptunnel_policy, extack); if (err < 0) return err; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index f8ad15891cd7..ac912bb21747 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1406,7 +1406,8 @@ static void __net_init ipip6_fb_tunnel_init(struct net_device *dev) rcu_assign_pointer(sitn->tunnels_wc[0], tunnel); } -static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[]) +static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) { u8 proto; @@ -1537,7 +1538,8 @@ static bool ipip6_netlink_6rd_parms(struct nlattr *data[], #endif static int ipip6_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) { struct net *net = dev_net(dev); struct ip_tunnel *nt; @@ -1573,7 +1575,8 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev, } static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[], - struct nlattr *data[]) + struct nlattr *data[], + struct netlink_ext_ack *extack) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm p; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 5abc3692b901..7b75b0620730 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -162,15 +162,16 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, 0, NULL); + tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL); if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { - tsoff = secure_tcpv6_ts_off(ipv6_hdr(skb)->daddr.s6_addr32, + tsoff = secure_tcpv6_ts_off(sock_net(sk), + ipv6_hdr(skb)->daddr.s6_addr32, ipv6_hdr(skb)->saddr.s6_addr32); tcp_opt.rcv_tsecr -= tsoff; } - if (!cookie_timestamp_decode(&tcp_opt)) + if (!cookie_timestamp_decode(sock_net(sk), &tcp_opt)) goto out; ret = NULL; @@ -193,7 +194,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) if (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { - atomic_inc(&skb->users); + refcount_inc(&skb->users); ireq->pktopts = skb; } @@ -211,7 +212,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->wscale_ok = tcp_opt.wscale_ok; ireq->tstamp_ok = tcp_opt.saw_tstamp; req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; - treq->snt_synack.v64 = 0; + treq->snt_synack = 0; treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; treq->ts_off = 0; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 4f4310a36a04..2521690d62d6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -109,9 +109,9 @@ static u32 tcp_v6_init_seq(const struct sk_buff *skb) tcp_hdr(skb)->source); } -static u32 tcp_v6_init_ts_off(const struct sk_buff *skb) +static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) { - return secure_tcpv6_ts_off(ipv6_hdr(skb)->daddr.s6_addr32, + return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, ipv6_hdr(skb)->saddr.s6_addr32); } @@ -292,7 +292,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk->sk_v6_daddr.s6_addr32, inet->inet_sport, inet->inet_dport); - tp->tsoffset = secure_tcpv6_ts_off(np->saddr.s6_addr32, + tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk), + np->saddr.s6_addr32, sk->sk_v6_daddr.s6_addr32); } @@ -514,11 +515,12 @@ static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr); } -static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval, - int optlen) +static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, + char __user *optval, int optlen) { struct tcp_md5sig cmd; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; + u8 prefixlen; if (optlen < sizeof(cmd)) return -EINVAL; @@ -529,12 +531,22 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval, if (sin6->sin6_family != AF_INET6) return -EINVAL; + if (optname == TCP_MD5SIG_EXT && + cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { + prefixlen = cmd.tcpm_prefixlen; + if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) && + prefixlen > 32)) + return -EINVAL; + } else { + prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; + } + if (!cmd.tcpm_keylen) { if (ipv6_addr_v4mapped(&sin6->sin6_addr)) return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], - AF_INET); + AF_INET, prefixlen); return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, - AF_INET6); + AF_INET6, prefixlen); } if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) @@ -542,10 +554,12 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval, if (ipv6_addr_v4mapped(&sin6->sin6_addr)) return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], - AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); + AF_INET, prefixlen, cmd.tcpm_key, + cmd.tcpm_keylen, GFP_KERNEL); return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr, - AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); + AF_INET6, prefixlen, cmd.tcpm_key, + cmd.tcpm_keylen, GFP_KERNEL); } static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp, @@ -720,7 +734,7 @@ static void tcp_v6_init_req(struct request_sock *req, np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim || np->repflow)) { - atomic_inc(&skb->users); + refcount_inc(&skb->users); ireq->pktopts = skb; } } @@ -788,7 +802,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len); - t1 = (struct tcphdr *) skb_push(buff, tot_len); + t1 = skb_push(buff, tot_len); skb_reset_transport_header(buff); /* Swap the send and the receive. */ @@ -949,7 +963,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, - tcp_time_stamp + tcptw->tw_ts_offset, + tcp_time_stamp_raw() + tcptw->tw_ts_offset, tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel)); @@ -971,7 +985,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, - tcp_time_stamp + tcp_rsk(req)->ts_off, + tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, req->ts_recent, sk->sk_bound_dev_if, tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0, 0); @@ -1185,7 +1199,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * * across. Shucks. */ tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr, - AF_INET6, key->key, key->keylen, + AF_INET6, 128, key->key, key->keylen, sk_gfp_mask(sk, GFP_ATOMIC)); } #endif @@ -1248,9 +1262,6 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_do_rcv(sk, skb); - if (tcp_filter(sk, skb)) - goto discard; - /* * socket locking is here for SMP purposes as backlog rcv * is currently called with bh processing disabled. @@ -1453,6 +1464,8 @@ process: if (nsk == sk) { reqsk_put(req); tcp_v6_restore_cb(skb); + } else if (tcp_filter(sk, skb)) { + goto discard_and_relse; } else if (tcp_child_process(sk, nsk, skb)) { tcp_v6_send_reset(nsk, skb); goto discard_and_relse; @@ -1796,7 +1809,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), icsk->icsk_probes_out, sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp, + refcount_read(&sp->sk_refcnt), sp, jiffies_to_clock_t(icsk->icsk_rto), jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, @@ -1829,7 +1842,7 @@ static void get_timewait6_sock(struct seq_file *seq, dest->s6_addr32[2], dest->s6_addr32[3], destp, tw->tw_substate, 0, 0, 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, - atomic_read(&tw->tw_refcnt), tw); + refcount_read(&tw->tw_refcnt), tw); } static int tcp6_seq_show(struct seq_file *seq, void *v) @@ -1909,6 +1922,7 @@ struct proto tcpv6_prot = { .unhash = inet_unhash, .get_port = inet_csk_get_port, .enter_memory_pressure = tcp_enter_memory_pressure, + .leave_memory_pressure = tcp_leave_memory_pressure, .stream_memory_free = tcp_stream_memory_free, .sockets_allocated = &tcp_sockets_allocated, .memory_allocated = &tcp_memory_allocated, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 75703fda23e7..4a3e65626e8b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -325,7 +325,7 @@ struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be sk = __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table, NULL); - if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; } @@ -362,7 +362,7 @@ try_again: if (!skb) return err; - ulen = skb->len; + ulen = udp_skb_len(skb); copied = len; if (copied > ulen - off) copied = ulen - off; @@ -379,14 +379,18 @@ try_again: if (copied < ulen || peeking || (is_udplite && UDP_SKB_CB(skb)->partial_cov)) { - checksum_valid = !udp_lib_checksum_complete(skb); + checksum_valid = udp_skb_csum_unnecessary(skb) || + !__udp_lib_checksum_complete(skb); if (!checksum_valid) goto csum_copy_err; } - if (checksum_valid || skb_csum_unnecessary(skb)) - err = skb_copy_datagram_msg(skb, off, msg, copied); - else { + if (checksum_valid || udp_skb_csum_unnecessary(skb)) { + if (udp_skb_is_linear(skb)) + err = copy_linear_skb(skb, copied, off, &msg->msg_iter); + else + err = skb_copy_datagram_msg(skb, off, msg, copied); + } else { err = skb_copy_and_csum_datagram_msg(skb, off, msg); if (err == -EINVAL) goto csum_copy_err; @@ -455,7 +459,8 @@ try_again: return err; csum_copy_err: - if (!__sk_queue_drop_skb(sk, skb, flags, udp_skb_destructor)) { + if (!__sk_queue_drop_skb(sk, &udp_sk(sk)->reader_queue, skb, flags, + udp_skb_destructor)) { if (is_udp4) { UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); @@ -629,6 +634,7 @@ static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } } + prefetch(&sk->sk_rmem_alloc); if (rcu_access_pointer(sk->sk_filter) && udp_lib_checksum_complete(skb)) goto csum_error; @@ -910,7 +916,7 @@ static void udp_v6_early_demux(struct sk_buff *skb) else return; - if (!sk || !atomic_inc_not_zero_hint(&sk->sk_refcnt, 2)) + if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt)) return; skb->sk = sk; @@ -920,12 +926,11 @@ static void udp_v6_early_demux(struct sk_buff *skb) if (dst) dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie); if (dst) { - if (dst->flags & DST_NOCACHE) { - if (likely(atomic_inc_not_zero(&dst->__refcnt))) - skb_dst_set(skb, dst); - } else { - skb_dst_set_noref(skb, dst); - } + /* set noref for now. + * any place which wants to hold dst has to call + * dst_hold_safe() + */ + skb_dst_set_noref(skb, dst); } } diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c index 1e205c3253ac..57fd314ec2b8 100644 --- a/net/ipv6/xfrm6_mode_beet.c +++ b/net/ipv6/xfrm6_mode_beet.c @@ -54,7 +54,7 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb) skb->mac_header = skb->network_header + offsetof(struct ipv6hdr, nexthdr); skb->transport_header = skb->network_header + sizeof(*top_iph); - ph = (struct ip_beet_phdr *)__skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl-hdr_len); + ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdr_len); xfrm6_beet_make_header(skb); diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index d7b731a78d09..4e438bc7ee87 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -59,7 +59,7 @@ struct xfrm6_tunnel_spi { struct hlist_node list_byspi; xfrm_address_t addr; u32 spi; - atomic_t refcnt; + refcount_t refcnt; struct rcu_head rcu_head; }; @@ -160,7 +160,7 @@ alloc_spi: memcpy(&x6spi->addr, saddr, sizeof(x6spi->addr)); x6spi->spi = spi; - atomic_set(&x6spi->refcnt, 1); + refcount_set(&x6spi->refcnt, 1); hlist_add_head_rcu(&x6spi->list_byspi, &xfrm6_tn->spi_byspi[index]); @@ -178,7 +178,7 @@ __be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr) spin_lock_bh(&xfrm6_tunnel_spi_lock); x6spi = __xfrm6_tunnel_spi_lookup(net, saddr); if (x6spi) { - atomic_inc(&x6spi->refcnt); + refcount_inc(&x6spi->refcnt); spi = x6spi->spi; } else spi = __xfrm6_tunnel_alloc_spi(net, saddr); @@ -207,7 +207,7 @@ static void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr) list_byaddr) { if (xfrm6_addr_equal(&x6spi->addr, saddr)) { - if (atomic_dec_and_test(&x6spi->refcnt)) { + if (refcount_dec_and_test(&x6spi->refcnt)) { hlist_del_rcu(&x6spi->list_byaddr); hlist_del_rcu(&x6spi->list_byspi); call_rcu(&x6spi->rcu_head, x6spi_destroy_rcu); diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index fa31ef29e3fa..ac598ec90589 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -308,7 +308,7 @@ void ipxitf_down(struct ipx_interface *intrfc) static void __ipxitf_put(struct ipx_interface *intrfc) { - if (atomic_dec_and_test(&intrfc->refcnt)) + if (refcount_dec_and_test(&intrfc->refcnt)) __ipxitf_down(intrfc); } @@ -876,7 +876,7 @@ static struct ipx_interface *ipxitf_alloc(struct net_device *dev, __be32 netnum, intrfc->if_ipx_offset = ipx_offset; intrfc->if_sknum = IPX_MIN_EPHEMERAL_SOCKET; INIT_HLIST_HEAD(&intrfc->if_sklist); - atomic_set(&intrfc->refcnt, 1); + refcount_set(&intrfc->refcnt, 1); spin_lock_init(&intrfc->if_sklist_lock); } @@ -1105,7 +1105,7 @@ static struct ipx_interface *ipxitf_auto_create(struct net_device *dev, memcpy((char *)&(intrfc->if_node[IPX_NODE_LEN-dev->addr_len]), dev->dev_addr, dev->addr_len); spin_lock_init(&intrfc->if_sklist_lock); - atomic_set(&intrfc->refcnt, 1); + refcount_set(&intrfc->refcnt, 1); ipxitf_insert(intrfc); dev_hold(dev); } diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c index c1d247ebe916..7d75e4c5c75d 100644 --- a/net/ipx/ipx_proc.c +++ b/net/ipx/ipx_proc.c @@ -53,7 +53,7 @@ static int ipx_seq_interface_show(struct seq_file *seq, void *v) seq_printf(seq, "%-11s", ipx_device_name(i)); seq_printf(seq, "%-9s", ipx_frame_name(i->if_dlink_type)); #ifdef IPX_REFCNT_DEBUG - seq_printf(seq, "%6d", atomic_read(&i->refcnt)); + seq_printf(seq, "%6d", refcount_read(&i->refcnt)); #endif seq_puts(seq, "\n"); out: diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c index 3e2a32a9f3bd..b5d91447f3dc 100644 --- a/net/ipx/ipx_route.c +++ b/net/ipx/ipx_route.c @@ -59,7 +59,7 @@ int ipxrtr_add_route(__be32 network, struct ipx_interface *intrfc, if (!rt) goto out; - atomic_set(&rt->refcnt, 1); + refcount_set(&rt->refcnt, 1); ipxrtr_hold(rt); write_lock_bh(&ipx_routes_lock); list_add(&rt->node, &ipx_routes); diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index f6061c4bb0a8..ec157c3419b5 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -690,7 +690,7 @@ static int ircomm_tty_write(struct tty_struct *tty, } /* Copy data */ - memcpy(skb_put(skb,size), buf + len, size); + skb_put_data(skb, buf + len, size); count -= size; len += size; diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c index b936b1251a66..debda3de4726 100644 --- a/net/irda/irlap_frame.c +++ b/net/irda/irlap_frame.c @@ -133,7 +133,7 @@ void irlap_send_snrm_frame(struct irlap_cb *self, struct qos_info *qos) if (!tx_skb) return; - frame = (struct snrm_frame *) skb_put(tx_skb, 2); + frame = skb_put(tx_skb, 2); /* Insert connection address field */ if (qos) @@ -228,7 +228,7 @@ void irlap_send_ua_response_frame(struct irlap_cb *self, struct qos_info *qos) if (!tx_skb) return; - frame = (struct ua_frame *) skb_put(tx_skb, 10); + frame = skb_put(tx_skb, 10); /* Build UA response */ frame->caddr = self->caddr; @@ -268,7 +268,7 @@ void irlap_send_dm_frame( struct irlap_cb *self) if (!tx_skb) return; - frame = (struct dm_frame *)skb_put(tx_skb, 2); + frame = skb_put(tx_skb, 2); if (self->state == LAP_NDM) frame->caddr = CBROADCAST; @@ -298,7 +298,7 @@ void irlap_send_disc_frame(struct irlap_cb *self) if (!tx_skb) return; - frame = (struct disc_frame *)skb_put(tx_skb, 2); + frame = skb_put(tx_skb, 2); frame->caddr = self->caddr | CMD_FRAME; frame->control = DISC_CMD | PF_BIT; @@ -392,8 +392,7 @@ void irlap_send_discovery_xid_frame(struct irlap_cb *self, int S, __u8 s, info[0] = discovery->data.charset; len = IRDA_MIN(discovery->name_len, skb_tailroom(tx_skb)); - info = skb_put(tx_skb, len); - memcpy(info, discovery->data.info, len); + skb_put_data(tx_skb, discovery->data.info, len); } irlap_queue_xmit(self, tx_skb); } @@ -588,7 +587,7 @@ void irlap_send_rr_frame(struct irlap_cb *self, int command) if (!tx_skb) return; - frame = (struct rr_frame *)skb_put(tx_skb, 2); + frame = skb_put(tx_skb, 2); frame->caddr = self->caddr; frame->caddr |= (command) ? CMD_FRAME : 0; @@ -613,7 +612,7 @@ void irlap_send_rd_frame(struct irlap_cb *self) if (!tx_skb) return; - frame = (struct rd_frame *)skb_put(tx_skb, 2); + frame = skb_put(tx_skb, 2); frame->caddr = self->caddr; frame->control = RD_RSP | PF_BIT; @@ -1195,7 +1194,6 @@ void irlap_send_test_frame(struct irlap_cb *self, __u8 caddr, __u32 daddr, { struct sk_buff *tx_skb; struct test_frame *frame; - __u8 *info; tx_skb = alloc_skb(cmd->len + sizeof(struct test_frame), GFP_ATOMIC); if (!tx_skb) @@ -1203,21 +1201,19 @@ void irlap_send_test_frame(struct irlap_cb *self, __u8 caddr, __u32 daddr, /* Broadcast frames must include saddr and daddr fields */ if (caddr == CBROADCAST) { - frame = (struct test_frame *) - skb_put(tx_skb, sizeof(struct test_frame)); + frame = skb_put(tx_skb, sizeof(struct test_frame)); /* Insert the swapped addresses */ frame->saddr = cpu_to_le32(self->saddr); frame->daddr = cpu_to_le32(daddr); } else - frame = (struct test_frame *) skb_put(tx_skb, LAP_ADDR_HEADER + LAP_CTRL_HEADER); + frame = skb_put(tx_skb, LAP_ADDR_HEADER + LAP_CTRL_HEADER); frame->caddr = caddr; frame->control = TEST_RSP | PF_BIT; /* Copy info */ - info = skb_put(tx_skb, cmd->len); - memcpy(info, cmd->data, cmd->len); + skb_put_data(tx_skb, cmd->data, cmd->len); /* Return to sender */ irlap_wait_min_turn_around(self, &self->qos_tx); diff --git a/net/irda/irnet/irnet_irda.c b/net/irda/irnet/irnet_irda.c index 7f17a8020e8a..e390bceeb2f8 100644 --- a/net/irda/irnet/irnet_irda.c +++ b/net/irda/irnet/irnet_irda.c @@ -1065,7 +1065,7 @@ irnet_data_indication(void * instance, if(p[0] & 1) { /* protocol is compressed */ - skb_push(skb, 1)[0] = 0; + *(u8 *)skb_push(skb, 1) = 0; } else if(skb->len < 2) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 84de7b6326dc..148533169b1d 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -322,8 +322,7 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock, int err, confirm_recv = 0; memset(skb->head, 0, ETH_HLEN); - phs_hdr = (struct af_iucv_trans_hdr *)skb_push(skb, - sizeof(struct af_iucv_trans_hdr)); + phs_hdr = skb_push(skb, sizeof(struct af_iucv_trans_hdr)); skb_reset_mac_header(skb); skb_reset_network_header(skb); skb_push(skb, ETH_HLEN); @@ -363,7 +362,7 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock, else skb_trim(skb, skb->dev->mtu); } - skb->protocol = ETH_P_AF_IUCV; + skb->protocol = cpu_to_be16(ETH_P_AF_IUCV); nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) return -ENOMEM; @@ -403,7 +402,7 @@ static void iucv_sock_destruct(struct sock *sk) } WARN_ON(atomic_read(&sk->sk_rmem_alloc)); - WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(refcount_read(&sk->sk_wmem_alloc)); WARN_ON(sk->sk_wmem_queued); WARN_ON(sk->sk_forward_alloc); } @@ -716,10 +715,8 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr, char uid[9]; /* Verify the input sockaddr */ - if (!addr || addr->sa_family != AF_IUCV) - return -EINVAL; - - if (addr_len < sizeof(struct sockaddr_iucv)) + if (addr_len < sizeof(struct sockaddr_iucv) || + addr->sa_family != AF_IUCV) return -EINVAL; lock_sock(sk); @@ -863,7 +860,7 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, struct iucv_sock *iucv = iucv_sk(sk); int err; - if (addr->sa_family != AF_IUCV || alen < sizeof(struct sockaddr_iucv)) + if (alen < sizeof(struct sockaddr_iucv) || addr->sa_family != AF_IUCV) return -EINVAL; if (sk->sk_state != IUCV_OPEN && sk->sk_state != IUCV_BOUND) diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c index bf75c9231cca..c343ac60bf50 100644 --- a/net/kcm/kcmproc.c +++ b/net/kcm/kcmproc.c @@ -162,7 +162,7 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq, psock->sk->sk_receive_queue.qlen, atomic_read(&psock->sk->sk_rmem_alloc), psock->sk->sk_write_queue.qlen, - atomic_read(&psock->sk->sk_wmem_alloc)); + refcount_read(&psock->sk->sk_wmem_alloc)); if (psock->done) seq_puts(seq, "Done "); diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index deca20fb2ce2..da49191f7ad0 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1985,7 +1985,7 @@ static int kcm_create(struct net *net, struct socket *sock, return 0; } -static struct net_proto_family kcm_family_ops = { +static const struct net_proto_family kcm_family_ops = { .family = PF_KCM, .create = kcm_create, .owner = THIS_MODULE, diff --git a/net/key/af_key.c b/net/key/af_key.c index b1432b668033..ca9d3ae665e7 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -109,7 +109,7 @@ static void pfkey_sock_destruct(struct sock *sk) } WARN_ON(atomic_read(&sk->sk_rmem_alloc)); - WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(refcount_read(&sk->sk_wmem_alloc)); atomic_dec(&net_pfkey->socks_nr); } @@ -203,11 +203,11 @@ static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2, sock_hold(sk); if (*skb2 == NULL) { - if (atomic_read(&skb->users) != 1) { + if (refcount_read(&skb->users) != 1) { *skb2 = skb_clone(skb, allocation); } else { *skb2 = skb; - atomic_inc(&skb->users); + refcount_inc(&skb->users); } } if (*skb2 != NULL) { @@ -349,7 +349,7 @@ static int pfkey_error(const struct sadb_msg *orig, int err, struct sock *sk) err = EINVAL; BUG_ON(err <= 0 || err >= 256); - hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg)); + hdr = skb_put(skb, sizeof(struct sadb_msg)); pfkey_hdr_dup(hdr, orig); hdr->sadb_msg_errno = (uint8_t) err; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / @@ -810,12 +810,12 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x, return ERR_PTR(-ENOBUFS); /* call should fill header later */ - hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg)); + hdr = skb_put(skb, sizeof(struct sadb_msg)); memset(hdr, 0, size); /* XXX do we need this ? */ hdr->sadb_msg_len = size / sizeof(uint64_t); /* sa */ - sa = (struct sadb_sa *) skb_put(skb, sizeof(struct sadb_sa)); + sa = skb_put(skb, sizeof(struct sadb_sa)); sa->sadb_sa_len = sizeof(struct sadb_sa)/sizeof(uint64_t); sa->sadb_sa_exttype = SADB_EXT_SA; sa->sadb_sa_spi = x->id.spi; @@ -862,8 +862,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x, /* hard time */ if (hsc & 2) { - lifetime = (struct sadb_lifetime *) skb_put(skb, - sizeof(struct sadb_lifetime)); + lifetime = skb_put(skb, sizeof(struct sadb_lifetime)); lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime)/sizeof(uint64_t); lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD; @@ -874,8 +873,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x, } /* soft time */ if (hsc & 1) { - lifetime = (struct sadb_lifetime *) skb_put(skb, - sizeof(struct sadb_lifetime)); + lifetime = skb_put(skb, sizeof(struct sadb_lifetime)); lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime)/sizeof(uint64_t); lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT; @@ -885,8 +883,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x, lifetime->sadb_lifetime_usetime = x->lft.soft_use_expires_seconds; } /* current time */ - lifetime = (struct sadb_lifetime *) skb_put(skb, - sizeof(struct sadb_lifetime)); + lifetime = skb_put(skb, sizeof(struct sadb_lifetime)); lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime)/sizeof(uint64_t); lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT; @@ -895,8 +892,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x, lifetime->sadb_lifetime_addtime = x->curlft.add_time; lifetime->sadb_lifetime_usetime = x->curlft.use_time; /* src address */ - addr = (struct sadb_address*) skb_put(skb, - sizeof(struct sadb_address)+sockaddr_size); + addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); @@ -915,8 +911,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x, BUG(); /* dst address */ - addr = (struct sadb_address*) skb_put(skb, - sizeof(struct sadb_address)+sockaddr_size); + addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); @@ -933,8 +928,8 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x, if (!xfrm_addr_equal(&x->sel.saddr, &x->props.saddr, x->props.family)) { - addr = (struct sadb_address*) skb_put(skb, - sizeof(struct sadb_address)+sockaddr_size); + addr = skb_put(skb, + sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); @@ -951,8 +946,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x, /* auth key */ if (add_keys && auth_key_size) { - key = (struct sadb_key *) skb_put(skb, - sizeof(struct sadb_key)+auth_key_size); + key = skb_put(skb, sizeof(struct sadb_key) + auth_key_size); key->sadb_key_len = (sizeof(struct sadb_key) + auth_key_size) / sizeof(uint64_t); key->sadb_key_exttype = SADB_EXT_KEY_AUTH; @@ -962,8 +956,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x, } /* encrypt key */ if (add_keys && encrypt_key_size) { - key = (struct sadb_key *) skb_put(skb, - sizeof(struct sadb_key)+encrypt_key_size); + key = skb_put(skb, sizeof(struct sadb_key) + encrypt_key_size); key->sadb_key_len = (sizeof(struct sadb_key) + encrypt_key_size) / sizeof(uint64_t); key->sadb_key_exttype = SADB_EXT_KEY_ENCRYPT; @@ -974,7 +967,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x, } /* sa */ - sa2 = (struct sadb_x_sa2 *) skb_put(skb, sizeof(struct sadb_x_sa2)); + sa2 = skb_put(skb, sizeof(struct sadb_x_sa2)); sa2->sadb_x_sa2_len = sizeof(struct sadb_x_sa2)/sizeof(uint64_t); sa2->sadb_x_sa2_exttype = SADB_X_EXT_SA2; if ((mode = pfkey_mode_from_xfrm(x->props.mode)) < 0) { @@ -992,7 +985,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x, struct sadb_x_nat_t_port *n_port; /* type */ - n_type = (struct sadb_x_nat_t_type*) skb_put(skb, sizeof(*n_type)); + n_type = skb_put(skb, sizeof(*n_type)); n_type->sadb_x_nat_t_type_len = sizeof(*n_type)/sizeof(uint64_t); n_type->sadb_x_nat_t_type_exttype = SADB_X_EXT_NAT_T_TYPE; n_type->sadb_x_nat_t_type_type = natt->encap_type; @@ -1001,14 +994,14 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x, n_type->sadb_x_nat_t_type_reserved[2] = 0; /* source port */ - n_port = (struct sadb_x_nat_t_port*) skb_put(skb, sizeof (*n_port)); + n_port = skb_put(skb, sizeof(*n_port)); n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t); n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_SPORT; n_port->sadb_x_nat_t_port_port = natt->encap_sport; n_port->sadb_x_nat_t_port_reserved = 0; /* dest port */ - n_port = (struct sadb_x_nat_t_port*) skb_put(skb, sizeof (*n_port)); + n_port = skb_put(skb, sizeof(*n_port)); n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t); n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_DPORT; n_port->sadb_x_nat_t_port_port = natt->encap_dport; @@ -1017,8 +1010,8 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x, /* security context */ if (xfrm_ctx) { - sec_ctx = (struct sadb_x_sec_ctx *) skb_put(skb, - sizeof(struct sadb_x_sec_ctx) + ctx_size); + sec_ctx = skb_put(skb, + sizeof(struct sadb_x_sec_ctx) + ctx_size); sec_ctx->sadb_x_sec_len = (sizeof(struct sadb_x_sec_ctx) + ctx_size) / sizeof(uint64_t); sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX; @@ -1626,7 +1619,7 @@ static struct sk_buff *compose_sadb_supported(const struct sadb_msg *orig, if (!skb) goto out_put_algs; - hdr = (struct sadb_msg *) skb_put(skb, sizeof(*hdr)); + hdr = skb_put(skb, sizeof(*hdr)); pfkey_hdr_dup(hdr, orig); hdr->sadb_msg_errno = 0; hdr->sadb_msg_len = len / sizeof(uint64_t); @@ -1635,7 +1628,7 @@ static struct sk_buff *compose_sadb_supported(const struct sadb_msg *orig, struct sadb_supported *sp; struct sadb_alg *ap; - sp = (struct sadb_supported *) skb_put(skb, auth_len); + sp = skb_put(skb, auth_len); ap = (struct sadb_alg *) (sp + 1); sp->sadb_supported_len = auth_len / sizeof(uint64_t); @@ -1656,7 +1649,7 @@ static struct sk_buff *compose_sadb_supported(const struct sadb_msg *orig, struct sadb_supported *sp; struct sadb_alg *ap; - sp = (struct sadb_supported *) skb_put(skb, enc_len); + sp = skb_put(skb, enc_len); ap = (struct sadb_alg *) (sp + 1); sp->sadb_supported_len = enc_len / sizeof(uint64_t); @@ -1715,8 +1708,7 @@ static int unicast_flush_resp(struct sock *sk, const struct sadb_msg *ihdr) if (!skb) return -ENOBUFS; - hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg)); - memcpy(hdr, ihdr, sizeof(struct sadb_msg)); + hdr = skb_put_data(skb, ihdr, sizeof(struct sadb_msg)); hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); @@ -1731,7 +1723,7 @@ static int key_notify_sa_flush(const struct km_event *c) skb = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_ATOMIC); if (!skb) return -ENOBUFS; - hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg)); + hdr = skb_put(skb, sizeof(struct sadb_msg)); hdr->sadb_msg_satype = pfkey_proto2satype(c->data.proto); hdr->sadb_msg_type = SADB_FLUSH; hdr->sadb_msg_seq = c->seq; @@ -2056,12 +2048,11 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy * size = pfkey_xfrm_policy2msg_size(xp); /* call should fill header later */ - hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg)); + hdr = skb_put(skb, sizeof(struct sadb_msg)); memset(hdr, 0, size); /* XXX do we need this ? */ /* src address */ - addr = (struct sadb_address*) skb_put(skb, - sizeof(struct sadb_address)+sockaddr_size); + addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); @@ -2076,8 +2067,7 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy * BUG(); /* dst address */ - addr = (struct sadb_address*) skb_put(skb, - sizeof(struct sadb_address)+sockaddr_size); + addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); @@ -2091,8 +2081,7 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy * xp->family); /* hard time */ - lifetime = (struct sadb_lifetime *) skb_put(skb, - sizeof(struct sadb_lifetime)); + lifetime = skb_put(skb, sizeof(struct sadb_lifetime)); lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime)/sizeof(uint64_t); lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD; @@ -2101,8 +2090,7 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy * lifetime->sadb_lifetime_addtime = xp->lft.hard_add_expires_seconds; lifetime->sadb_lifetime_usetime = xp->lft.hard_use_expires_seconds; /* soft time */ - lifetime = (struct sadb_lifetime *) skb_put(skb, - sizeof(struct sadb_lifetime)); + lifetime = skb_put(skb, sizeof(struct sadb_lifetime)); lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime)/sizeof(uint64_t); lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT; @@ -2111,8 +2099,7 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy * lifetime->sadb_lifetime_addtime = xp->lft.soft_add_expires_seconds; lifetime->sadb_lifetime_usetime = xp->lft.soft_use_expires_seconds; /* current time */ - lifetime = (struct sadb_lifetime *) skb_put(skb, - sizeof(struct sadb_lifetime)); + lifetime = skb_put(skb, sizeof(struct sadb_lifetime)); lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime)/sizeof(uint64_t); lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT; @@ -2121,7 +2108,7 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy * lifetime->sadb_lifetime_addtime = xp->curlft.add_time; lifetime->sadb_lifetime_usetime = xp->curlft.use_time; - pol = (struct sadb_x_policy *) skb_put(skb, sizeof(struct sadb_x_policy)); + pol = skb_put(skb, sizeof(struct sadb_x_policy)); pol->sadb_x_policy_len = sizeof(struct sadb_x_policy)/sizeof(uint64_t); pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; pol->sadb_x_policy_type = IPSEC_POLICY_DISCARD; @@ -2149,7 +2136,7 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy * } else { size -= 2*socklen; } - rq = (void*)skb_put(skb, req_size); + rq = skb_put(skb, req_size); pol->sadb_x_policy_len += req_size/8; memset(rq, 0, sizeof(*rq)); rq->sadb_x_ipsecrequest_len = req_size; @@ -2179,7 +2166,7 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy * if ((xfrm_ctx = xp->security)) { int ctx_size = pfkey_xfrm_policy2sec_ctx_size(xp); - sec_ctx = (struct sadb_x_sec_ctx *) skb_put(skb, ctx_size); + sec_ctx = skb_put(skb, ctx_size); sec_ctx->sadb_x_sec_len = ctx_size / sizeof(uint64_t); sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX; sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi; @@ -2190,7 +2177,7 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy * } hdr->sadb_msg_len = size / sizeof(uint64_t); - hdr->sadb_msg_reserved = atomic_read(&xp->refcnt); + hdr->sadb_msg_reserved = refcount_read(&xp->refcnt); return 0; } @@ -2611,7 +2598,7 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, } return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i, - kma ? &k : NULL, net); + kma ? &k : NULL, net, NULL); out: return err; @@ -2743,7 +2730,7 @@ static int key_notify_policy_flush(const struct km_event *c) skb_out = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_ATOMIC); if (!skb_out) return -ENOBUFS; - hdr = (struct sadb_msg *) skb_put(skb_out, sizeof(struct sadb_msg)); + hdr = skb_put(skb_out, sizeof(struct sadb_msg)); hdr->sadb_msg_type = SADB_X_SPDFLUSH; hdr->sadb_msg_seq = c->seq; hdr->sadb_msg_pid = c->portid; @@ -2929,7 +2916,7 @@ static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) struct sadb_prop *p; int i; - p = (struct sadb_prop*)skb_put(skb, sizeof(struct sadb_prop)); + p = skb_put(skb, sizeof(struct sadb_prop)); p->sadb_prop_len = sizeof(struct sadb_prop)/8; p->sadb_prop_exttype = SADB_EXT_PROPOSAL; p->sadb_prop_replay = 32; @@ -2945,8 +2932,7 @@ static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) if (aalg_tmpl_set(t, aalg) && aalg->available) { struct sadb_comb *c; - c = (struct sadb_comb*)skb_put(skb, sizeof(struct sadb_comb)); - memset(c, 0, sizeof(*c)); + c = skb_put_zero(skb, sizeof(struct sadb_comb)); p->sadb_prop_len += sizeof(struct sadb_comb)/8; c->sadb_comb_auth = aalg->desc.sadb_alg_id; c->sadb_comb_auth_minbits = aalg->desc.sadb_alg_minbits; @@ -2964,7 +2950,7 @@ static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) struct sadb_prop *p; int i, k; - p = (struct sadb_prop*)skb_put(skb, sizeof(struct sadb_prop)); + p = skb_put(skb, sizeof(struct sadb_prop)); p->sadb_prop_len = sizeof(struct sadb_prop)/8; p->sadb_prop_exttype = SADB_EXT_PROPOSAL; p->sadb_prop_replay = 32; @@ -2990,7 +2976,7 @@ static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) continue; if (!(aalg_tmpl_set(t, aalg) && aalg->available)) continue; - c = (struct sadb_comb*)skb_put(skb, sizeof(struct sadb_comb)); + c = skb_put(skb, sizeof(struct sadb_comb)); memset(c, 0, sizeof(*c)); p->sadb_prop_len += sizeof(struct sadb_comb)/8; c->sadb_comb_auth = aalg->desc.sadb_alg_id; @@ -3157,7 +3143,7 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct if (skb == NULL) return -ENOMEM; - hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg)); + hdr = skb_put(skb, sizeof(struct sadb_msg)); hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_type = SADB_ACQUIRE; hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto); @@ -3168,8 +3154,7 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct hdr->sadb_msg_pid = 0; /* src address */ - addr = (struct sadb_address*) skb_put(skb, - sizeof(struct sadb_address)+sockaddr_size); + addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); @@ -3184,8 +3169,7 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct BUG(); /* dst address */ - addr = (struct sadb_address*) skb_put(skb, - sizeof(struct sadb_address)+sockaddr_size); + addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); @@ -3199,7 +3183,7 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct if (!addr->sadb_address_prefixlen) BUG(); - pol = (struct sadb_x_policy *) skb_put(skb, sizeof(struct sadb_x_policy)); + pol = skb_put(skb, sizeof(struct sadb_x_policy)); pol->sadb_x_policy_len = sizeof(struct sadb_x_policy)/sizeof(uint64_t); pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; @@ -3216,8 +3200,8 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct /* security context */ if (xfrm_ctx) { - sec_ctx = (struct sadb_x_sec_ctx *) skb_put(skb, - sizeof(struct sadb_x_sec_ctx) + ctx_size); + sec_ctx = skb_put(skb, + sizeof(struct sadb_x_sec_ctx) + ctx_size); sec_ctx->sadb_x_sec_len = (sizeof(struct sadb_x_sec_ctx) + ctx_size) / sizeof(uint64_t); sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX; @@ -3359,7 +3343,7 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, if (skb == NULL) return -ENOMEM; - hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg)); + hdr = skb_put(skb, sizeof(struct sadb_msg)); hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_type = SADB_X_NAT_T_NEW_MAPPING; hdr->sadb_msg_satype = satype; @@ -3370,7 +3354,7 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, hdr->sadb_msg_pid = 0; /* SA */ - sa = (struct sadb_sa *) skb_put(skb, sizeof(struct sadb_sa)); + sa = skb_put(skb, sizeof(struct sadb_sa)); sa->sadb_sa_len = sizeof(struct sadb_sa)/sizeof(uint64_t); sa->sadb_sa_exttype = SADB_EXT_SA; sa->sadb_sa_spi = x->id.spi; @@ -3381,8 +3365,7 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, sa->sadb_sa_flags = 0; /* ADDRESS_SRC (old addr) */ - addr = (struct sadb_address*) - skb_put(skb, sizeof(struct sadb_address)+sockaddr_size); + addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); @@ -3397,15 +3380,14 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, BUG(); /* NAT_T_SPORT (old port) */ - n_port = (struct sadb_x_nat_t_port*) skb_put(skb, sizeof (*n_port)); + n_port = skb_put(skb, sizeof(*n_port)); n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t); n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_SPORT; n_port->sadb_x_nat_t_port_port = natt->encap_sport; n_port->sadb_x_nat_t_port_reserved = 0; /* ADDRESS_DST (new addr) */ - addr = (struct sadb_address*) - skb_put(skb, sizeof(struct sadb_address)+sockaddr_size); + addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); @@ -3420,7 +3402,7 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, BUG(); /* NAT_T_DPORT (new port) */ - n_port = (struct sadb_x_nat_t_port*) skb_put(skb, sizeof (*n_port)); + n_port = skb_put(skb, sizeof(*n_port)); n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t); n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_DPORT; n_port->sadb_x_nat_t_port_port = sport; @@ -3434,7 +3416,7 @@ static int set_sadb_address(struct sk_buff *skb, int sasize, int type, const struct xfrm_selector *sel) { struct sadb_address *addr; - addr = (struct sadb_address *)skb_put(skb, sizeof(struct sadb_address) + sasize); + addr = skb_put(skb, sizeof(struct sadb_address) + sasize); addr->sadb_address_len = (sizeof(struct sadb_address) + sasize)/8; addr->sadb_address_exttype = type; addr->sadb_address_proto = sel->proto; @@ -3472,8 +3454,7 @@ static int set_sadb_kmaddress(struct sk_buff *skb, const struct xfrm_kmaddress * size_req = (sizeof(struct sadb_x_kmaddress) + pfkey_sockaddr_pair_size(family)); - kma = (struct sadb_x_kmaddress *)skb_put(skb, size_req); - memset(kma, 0, size_req); + kma = skb_put_zero(skb, size_req); kma->sadb_x_kmaddress_len = size_req / 8; kma->sadb_x_kmaddress_exttype = SADB_X_EXT_KMADDRESS; kma->sadb_x_kmaddress_reserved = k->reserved; @@ -3499,8 +3480,7 @@ static int set_ipsecrequest(struct sk_buff *skb, size_req = sizeof(struct sadb_x_ipsecrequest) + pfkey_sockaddr_pair_size(family); - rq = (struct sadb_x_ipsecrequest *)skb_put(skb, size_req); - memset(rq, 0, size_req); + rq = skb_put_zero(skb, size_req); rq->sadb_x_ipsecrequest_len = size_req; rq->sadb_x_ipsecrequest_proto = proto; rq->sadb_x_ipsecrequest_mode = mode; @@ -3519,7 +3499,8 @@ static int set_ipsecrequest(struct sk_buff *skb, #ifdef CONFIG_NET_KEY_MIGRATE static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_bundles, - const struct xfrm_kmaddress *k) + const struct xfrm_kmaddress *k, + const struct xfrm_encap_tmpl *encap) { int i; int sasize_sel; @@ -3568,7 +3549,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, if (skb == NULL) return -ENOMEM; - hdr = (struct sadb_msg *)skb_put(skb, sizeof(struct sadb_msg)); + hdr = skb_put(skb, sizeof(struct sadb_msg)); hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_type = SADB_X_MIGRATE; hdr->sadb_msg_satype = pfkey_proto2satype(m->proto); @@ -3589,7 +3570,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, set_sadb_address(skb, sasize_sel, SADB_EXT_ADDRESS_DST, sel); /* policy information */ - pol = (struct sadb_x_policy *)skb_put(skb, sizeof(struct sadb_x_policy)); + pol = skb_put(skb, sizeof(struct sadb_x_policy)); pol->sadb_x_policy_len = size_pol / 8; pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; @@ -3629,7 +3610,8 @@ err: #else static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_bundles, - const struct xfrm_kmaddress *k) + const struct xfrm_kmaddress *k, + const struct xfrm_encap_tmpl *encap) { return -ENOPROTOOPT; } @@ -3757,7 +3739,7 @@ static int pfkey_seq_show(struct seq_file *f, void *v) else seq_printf(f, "%pK %-6d %-6u %-6u %-6u %-6lu\n", s, - atomic_read(&s->sk_refcnt), + refcount_read(&s->sk_refcnt), sk_rmem_alloc_get(s), sk_wmem_alloc_get(s), from_kuid_munged(seq_user_ns(f), sock_i_uid(s)), diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index fa0342574b89..b0c2d4ae781d 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -132,12 +132,12 @@ static inline struct l2tp_net *l2tp_pernet(const struct net *net) */ static inline void l2tp_tunnel_inc_refcount_1(struct l2tp_tunnel *tunnel) { - atomic_inc(&tunnel->ref_count); + refcount_inc(&tunnel->ref_count); } static inline void l2tp_tunnel_dec_refcount_1(struct l2tp_tunnel *tunnel) { - if (atomic_dec_and_test(&tunnel->ref_count)) + if (refcount_dec_and_test(&tunnel->ref_count)) l2tp_tunnel_free(tunnel); } #ifdef L2TP_REFCNT_DEBUG @@ -145,14 +145,14 @@ static inline void l2tp_tunnel_dec_refcount_1(struct l2tp_tunnel *tunnel) do { \ pr_debug("l2tp_tunnel_inc_refcount: %s:%d %s: cnt=%d\n", \ __func__, __LINE__, (_t)->name, \ - atomic_read(&_t->ref_count)); \ + refcount_read(&_t->ref_count)); \ l2tp_tunnel_inc_refcount_1(_t); \ } while (0) #define l2tp_tunnel_dec_refcount(_t) \ do { \ pr_debug("l2tp_tunnel_dec_refcount: %s:%d %s: cnt=%d\n", \ __func__, __LINE__, (_t)->name, \ - atomic_read(&_t->ref_count)); \ + refcount_read(&_t->ref_count)); \ l2tp_tunnel_dec_refcount_1(_t); \ } while (0) #else @@ -1353,7 +1353,7 @@ static void l2tp_udp_encap_destroy(struct sock *sk) */ static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel) { - BUG_ON(atomic_read(&tunnel->ref_count) != 0); + BUG_ON(refcount_read(&tunnel->ref_count) != 0); BUG_ON(tunnel->sock != NULL); l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: free...\n", tunnel->name); kfree_rcu(tunnel, rcu); @@ -1667,7 +1667,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 /* Bump the reference count. The tunnel context is deleted * only when this drops to zero. Must be done before list insertion */ - l2tp_tunnel_inc_refcount(tunnel); + refcount_set(&tunnel->ref_count, 1); spin_lock_bh(&pn->l2tp_tunnel_list_lock); list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list); spin_unlock_bh(&pn->l2tp_tunnel_list_lock); @@ -1706,7 +1706,7 @@ void l2tp_session_free(struct l2tp_session *session) { struct l2tp_tunnel *tunnel = session->tunnel; - BUG_ON(atomic_read(&session->ref_count) != 0); + BUG_ON(refcount_read(&session->ref_count) != 0); if (tunnel) { BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC); @@ -1854,7 +1854,7 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn /* Bump the reference count. The session context is deleted * only when this drops to zero. */ - l2tp_session_inc_refcount(session); + refcount_set(&session->ref_count, 1); l2tp_tunnel_inc_refcount(tunnel); /* Ensure tunnel socket isn't deleted */ diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index eec5ad2ebb93..cdb6e3327f74 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -7,6 +7,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#include <linux/refcount.h> #ifndef _L2TP_CORE_H_ #define _L2TP_CORE_H_ @@ -98,7 +99,7 @@ struct l2tp_session { int nr_oos_count; /* For OOS recovery */ int nr_oos_count_max; struct hlist_node hlist; /* Hash list node */ - atomic_t ref_count; + refcount_t ref_count; char name[32]; /* for logging */ char ifname[IFNAMSIZ]; @@ -177,7 +178,7 @@ struct l2tp_tunnel { struct list_head list; /* Keep a list of all tunnels */ struct net *l2tp_net; /* the net we belong to */ - atomic_t ref_count; + refcount_t ref_count; #ifdef CONFIG_DEBUG_FS void (*show)(struct seq_file *m, void *arg); #endif @@ -273,12 +274,12 @@ int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg); */ static inline void l2tp_session_inc_refcount_1(struct l2tp_session *session) { - atomic_inc(&session->ref_count); + refcount_inc(&session->ref_count); } static inline void l2tp_session_dec_refcount_1(struct l2tp_session *session) { - if (atomic_dec_and_test(&session->ref_count)) + if (refcount_dec_and_test(&session->ref_count)) l2tp_session_free(session); } @@ -287,14 +288,14 @@ static inline void l2tp_session_dec_refcount_1(struct l2tp_session *session) do { \ pr_debug("l2tp_session_inc_refcount: %s:%d %s: cnt=%d\n", \ __func__, __LINE__, (_s)->name, \ - atomic_read(&_s->ref_count)); \ + refcount_read(&_s->ref_count)); \ l2tp_session_inc_refcount_1(_s); \ } while (0) #define l2tp_session_dec_refcount(_s) \ do { \ pr_debug("l2tp_session_dec_refcount: %s:%d %s: cnt=%d\n", \ __func__, __LINE__, (_s)->name, \ - atomic_read(&_s->ref_count)); \ + refcount_read(&_s->ref_count)); \ l2tp_session_dec_refcount_1(_s); \ } while (0) #else diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index d100aed3d06f..53bae54c4d6e 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -144,9 +144,8 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v) tunnel->encap == L2TP_ENCAPTYPE_IP ? "IP" : ""); seq_printf(m, " %d sessions, refcnt %d/%d\n", session_count, - tunnel->sock ? atomic_read(&tunnel->sock->sk_refcnt) : 0, - atomic_read(&tunnel->ref_count)); - + tunnel->sock ? refcount_read(&tunnel->sock->sk_refcnt) : 0, + refcount_read(&tunnel->ref_count)); seq_printf(m, " %08x rx %ld/%ld/%ld rx %ld/%ld/%ld\n", tunnel->debug, atomic_long_read(&tunnel->stats.tx_packets), @@ -171,7 +170,7 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v) ""); if (session->send_seq || session->recv_seq) seq_printf(m, " nr %hu, ns %hu\n", session->nr, session->ns); - seq_printf(m, " refcnt %d\n", atomic_read(&session->ref_count)); + seq_printf(m, " refcnt %d\n", refcount_read(&session->ref_count)); seq_printf(m, " config %d/%d/%c/%c/%s/%s %08x %u\n", session->mtu, session->mru, session->recv_seq ? 'R' : '-', diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 32ea0f3d868c..f0edb7209079 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1616,7 +1616,7 @@ static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v) seq_printf(m, "\nTUNNEL '%s', %c %d\n", tunnel->name, (tunnel == tunnel->sock->sk_user_data) ? 'Y' : 'N', - atomic_read(&tunnel->ref_count) - 1); + refcount_read(&tunnel->ref_count) - 1); seq_printf(m, " %08x %ld/%ld/%ld %ld/%ld/%ld\n", tunnel->debug, atomic_long_read(&tunnel->stats.tx_packets), diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c index b50b64ac8815..e15314e3b464 100644 --- a/net/lapb/lapb_iface.c +++ b/net/lapb/lapb_iface.c @@ -54,12 +54,12 @@ static void lapb_free_cb(struct lapb_cb *lapb) static __inline__ void lapb_hold(struct lapb_cb *lapb) { - atomic_inc(&lapb->refcnt); + refcount_inc(&lapb->refcnt); } static __inline__ void lapb_put(struct lapb_cb *lapb) { - if (atomic_dec_and_test(&lapb->refcnt)) + if (refcount_dec_and_test(&lapb->refcnt)) lapb_free_cb(lapb); } @@ -136,7 +136,7 @@ static struct lapb_cb *lapb_create_cb(void) lapb->mode = LAPB_DEFAULT_MODE; lapb->window = LAPB_DEFAULT_WINDOW; lapb->state = LAPB_STATE_0; - atomic_set(&lapb->refcnt, 1); + refcount_set(&lapb->refcnt, 1); out: return lapb; } diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 9b02c13d258b..5e91b47f0d2a 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -507,7 +507,7 @@ again: sk_nulls_for_each_rcu(rc, node, laddr_hb) { if (llc_estab_match(sap, daddr, laddr, rc)) { /* Extra checks required by SLAB_TYPESAFE_BY_RCU */ - if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt))) + if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt))) goto again; if (unlikely(llc_sk(rc)->sap != sap || !llc_estab_match(sap, daddr, laddr, rc))) { @@ -566,7 +566,7 @@ again: sk_nulls_for_each_rcu(rc, node, laddr_hb) { if (llc_listener_match(sap, laddr, rc)) { /* Extra checks required by SLAB_TYPESAFE_BY_RCU */ - if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt))) + if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt))) goto again; if (unlikely(llc_sk(rc)->sap != sap || !llc_listener_match(sap, laddr, rc))) { @@ -973,9 +973,9 @@ void llc_sk_free(struct sock *sk) skb_queue_purge(&sk->sk_write_queue); skb_queue_purge(&llc->pdu_unack_q); #ifdef LLC_REFCNT_DEBUG - if (atomic_read(&sk->sk_refcnt) != 1) { + if (refcount_read(&sk->sk_refcnt) != 1) { printk(KERN_DEBUG "Destruction of LLC sock %p delayed in %s, cnt=%d\n", - sk, __func__, atomic_read(&sk->sk_refcnt)); + sk, __func__, refcount_read(&sk->sk_refcnt)); printk(KERN_DEBUG "%d LLC sockets are still alive\n", atomic_read(&llc_sock_nr)); } else { diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index 842851cef698..89041260784c 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -41,7 +41,7 @@ static struct llc_sap *llc_sap_alloc(void) spin_lock_init(&sap->sk_lock); for (i = 0; i < LLC_SK_LADDR_HASH_ENTRIES; i++) INIT_HLIST_NULLS_HEAD(&sap->sk_laddr_hash[i], i); - atomic_set(&sap->refcnt, 1); + refcount_set(&sap->refcnt, 1); } return sap; } diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index 63b6ab056370..d90928f50226 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -329,7 +329,7 @@ again: sk_nulls_for_each_rcu(rc, node, laddr_hb) { if (llc_dgram_match(sap, laddr, rc)) { /* Extra checks required by SLAB_TYPESAFE_BY_RCU */ - if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt))) + if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt))) goto again; if (unlikely(llc_sk(rc)->sap != sap || !llc_dgram_match(sap, laddr, rc))) { diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 1b7a4daf283c..8708cbe8af5b 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -213,8 +213,7 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d return; skb_reserve(skb, local->hw.extra_tx_headroom); - mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); - memset(mgmt, 0, 24); + mgmt = skb_put_zero(skb, 24); memcpy(mgmt->da, da, ETH_ALEN); memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); if (sdata->vif.type == NL80211_IFTYPE_AP || @@ -449,44 +448,21 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, buf_size, true, false); } -void ieee80211_start_rx_ba_session_offl(struct ieee80211_vif *vif, - const u8 *addr, u16 tid) +void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, + const u8 *addr, unsigned int bit) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; - struct ieee80211_rx_agg *rx_agg; - struct sk_buff *skb = dev_alloc_skb(0); - - if (unlikely(!skb)) - return; - - rx_agg = (struct ieee80211_rx_agg *) &skb->cb; - memcpy(&rx_agg->addr, addr, ETH_ALEN); - rx_agg->tid = tid; - - skb->pkt_type = IEEE80211_SDATA_QUEUE_RX_AGG_START; - skb_queue_tail(&sdata->skb_queue, skb); - ieee80211_queue_work(&local->hw, &sdata->work); -} -EXPORT_SYMBOL(ieee80211_start_rx_ba_session_offl); - -void ieee80211_stop_rx_ba_session_offl(struct ieee80211_vif *vif, - const u8 *addr, u16 tid) -{ - struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); - struct ieee80211_local *local = sdata->local; - struct ieee80211_rx_agg *rx_agg; - struct sk_buff *skb = dev_alloc_skb(0); - - if (unlikely(!skb)) - return; + struct sta_info *sta; - rx_agg = (struct ieee80211_rx_agg *) &skb->cb; - memcpy(&rx_agg->addr, addr, ETH_ALEN); - rx_agg->tid = tid; + rcu_read_lock(); + sta = sta_info_get_bss(sdata, addr); + if (!sta) + goto unlock; - skb->pkt_type = IEEE80211_SDATA_QUEUE_RX_AGG_STOP; - skb_queue_tail(&sdata->skb_queue, skb); - ieee80211_queue_work(&local->hw, &sdata->work); + set_bit(bit, sta->ampdu_mlme.tid_rx_manage_offl); + ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work); + unlock: + rcu_read_unlock(); } -EXPORT_SYMBOL(ieee80211_stop_rx_ba_session_offl); +EXPORT_SYMBOL(ieee80211_manage_rx_ba_offl); diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index cf2392b2ac71..cbd48762256c 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -76,8 +76,7 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, return; skb_reserve(skb, local->hw.extra_tx_headroom); - mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); - memset(mgmt, 0, 24); + mgmt = skb_put_zero(skb, 24); memcpy(mgmt->da, da, ETH_ALEN); memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); if (sdata->vif.type == NL80211_IFTYPE_AP || @@ -125,8 +124,7 @@ void ieee80211_send_bar(struct ieee80211_vif *vif, u8 *ra, u16 tid, u16 ssn) return; skb_reserve(skb, local->hw.extra_tx_headroom); - bar = (struct ieee80211_bar *)skb_put(skb, sizeof(*bar)); - memset(bar, 0, sizeof(*bar)); + bar = skb_put_zero(skb, sizeof(*bar)); bar->frame_control = cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_BACK_REQ); memcpy(bar->ra, ra, ETH_ALEN); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 4a388fe8c2d1..a354f1939e49 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1107,7 +1107,7 @@ static void ieee80211_send_layer2_update(struct sta_info *sta) skb = dev_alloc_skb(sizeof(*msg)); if (!skb) return; - msg = (struct iapp_layer2_update *)skb_put(skb, sizeof(*msg)); + msg = skb_put(skb, sizeof(*msg)); /* 802.2 Type 1 Logical Link Control (LLC) Exchange Identifier (XID) * Update response frame; IEEE Std 802.2-1998, 5.4.1.2.1 */ @@ -1876,6 +1876,7 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh, ifmsh->user_mpm = setup->user_mpm; ifmsh->mesh_auth_id = setup->auth_id; ifmsh->security = IEEE80211_MESH_SEC_NONE; + ifmsh->userspace_handles_dfs = setup->userspace_handles_dfs; if (setup->is_authenticated) ifmsh->security |= IEEE80211_MESH_SEC_AUTHED; if (setup->is_secure) @@ -3413,7 +3414,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, skb_reserve(skb, local->hw.extra_tx_headroom); - nullfunc = (void *) skb_put(skb, size); + nullfunc = skb_put(skb, size); nullfunc->frame_control = fc; nullfunc->duration_id = 0; memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN); diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 8f5fff8b2040..c813207bb123 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -330,8 +330,7 @@ static ssize_t ieee80211_if_parse_tkip_mic_test( return -ENOMEM; skb_reserve(skb, local->hw.extra_tx_headroom); - hdr = (struct ieee80211_hdr *) skb_put(skb, 24); - memset(hdr, 0, 24); + hdr = skb_put_zero(skb, 24); fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA); switch (sdata->vif.type) { @@ -367,7 +366,7 @@ static ssize_t ieee80211_if_parse_tkip_mic_test( * The exact contents does not matter since the recipient is required * to drop this because of the Michael MIC failure. */ - memset(skb_put(skb, 50), 0, 50); + skb_put_zero(skb, 50); IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_TKIP_MIC_FAILURE; diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 42601820db20..b15412c21ac9 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -154,6 +154,12 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf, p += scnprintf(p, bufsz+buf-p, + "target %uus interval %uus ecn %s\n", + codel_time_to_us(sta->cparams.target), + codel_time_to_us(sta->cparams.interval), + sta->cparams.ecn ? "yes" : "no"); + p += scnprintf(p, + bufsz+buf-p, "tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets\n"); for (i = 0; i < IEEE80211_NUM_TIDS; i++) { diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 6ca5442b1e03..c92df492e898 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -331,6 +331,18 @@ void ieee80211_ba_session_work(struct work_struct *work) sta, tid, WLAN_BACK_RECIPIENT, WLAN_REASON_UNSPECIFIED, true); + if (test_and_clear_bit(tid, + sta->ampdu_mlme.tid_rx_manage_offl)) + __ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid, + IEEE80211_MAX_AMPDU_BUF, + false, true); + + if (test_and_clear_bit(tid + IEEE80211_NUM_TIDS, + sta->ampdu_mlme.tid_rx_manage_offl)) + ___ieee80211_stop_rx_ba_session( + sta, tid, WLAN_BACK_RECIPIENT, + 0, false); + spin_lock_bh(&sta->lock); tid_tx = sta->ampdu_mlme.tid_start_tx[tid]; @@ -382,8 +394,7 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, return; skb_reserve(skb, local->hw.extra_tx_headroom); - mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); - memset(mgmt, 0, 24); + mgmt = skb_put_zero(skb, 24); memcpy(mgmt->da, da, ETH_ALEN); memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); if (sdata->vif.type == NL80211_IFTYPE_AP || @@ -448,7 +459,7 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, return -ENOMEM; skb_reserve(skb, local->hw.extra_tx_headroom); - action_frame = (void *)skb_put(skb, 27); + action_frame = skb_put(skb, 27); memcpy(action_frame->da, da, ETH_ALEN); memcpy(action_frame->sa, sdata->dev->dev_addr, ETH_ALEN); memcpy(action_frame->bssid, bssid, ETH_ALEN); diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 364d4e137649..e9c6aa3ed05b 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -808,7 +808,6 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, } memset(¶ms, 0, sizeof(params)); - memset(&csa_ie, 0, sizeof(csa_ie)); err = ieee80211_parse_ch_switch_ie(sdata, elems, ifibss->chandef.chan->band, sta_flags, ifibss->bssid, &csa_ie); @@ -1570,7 +1569,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, return; skb_reserve(skb, local->tx_headroom); - memcpy(skb_put(skb, presp->head_len), presp->head, presp->head_len); + skb_put_data(skb, presp->head, presp->head_len); memcpy(((struct ieee80211_mgmt *) skb->data)->da, mgmt->sa, ETH_ALEN); ibss_dbg(sdata, "Sending ProbeResp to %pM\n", mgmt->sa); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 5e002f62c235..2197c62a0a6e 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -643,6 +643,8 @@ struct ieee80211_if_mesh { unsigned long wrkq_flags; unsigned long mbss_changed; + bool userspace_handles_dfs; + u8 mesh_id[IEEE80211_MAX_MESH_ID_LEN]; size_t mesh_id_len; /* Active Path Selection Protocol Identifier */ @@ -1029,17 +1031,6 @@ ieee80211_vif_get_shift(struct ieee80211_vif *vif) return shift; } -struct ieee80211_rx_agg { - u8 addr[ETH_ALEN]; - u16 tid; -}; - -enum sdata_queue_type { - IEEE80211_SDATA_QUEUE_TYPE_FRAME = 0, - IEEE80211_SDATA_QUEUE_RX_AGG_START = 3, - IEEE80211_SDATA_QUEUE_RX_AGG_STOP = 4, -}; - enum { IEEE80211_RX_MSG = 1, IEEE80211_TX_STATUS_MSG = 2, @@ -1432,6 +1423,7 @@ struct ieee80211_csa_ie { u8 count; u8 ttl; u16 pre_value; + u16 reason_code; }; /* Parsed Information Elements */ @@ -2057,6 +2049,8 @@ u8 *ieee80211_ie_build_ht_cap(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, const struct cfg80211_chan_def *chandef, u16 prot_mode, bool rifs_mode); +void ieee80211_ie_build_wide_bw_cs(u8 *pos, + const struct cfg80211_chan_def *chandef); u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, u32 cap); u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index f5f50150ba1c..9228ac73c429 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1237,7 +1237,6 @@ static void ieee80211_iface_work(struct work_struct *work) struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct sta_info *sta; - struct ieee80211_rx_agg *rx_agg; if (!ieee80211_sdata_running(sdata)) return; @@ -1252,28 +1251,8 @@ static void ieee80211_iface_work(struct work_struct *work) while ((skb = skb_dequeue(&sdata->skb_queue))) { struct ieee80211_mgmt *mgmt = (void *)skb->data; - if (skb->pkt_type == IEEE80211_SDATA_QUEUE_RX_AGG_START) { - rx_agg = (void *)&skb->cb; - mutex_lock(&local->sta_mtx); - sta = sta_info_get_bss(sdata, rx_agg->addr); - if (sta) - __ieee80211_start_rx_ba_session(sta, - 0, 0, 0, 1, rx_agg->tid, - IEEE80211_MAX_AMPDU_BUF, - false, true); - mutex_unlock(&local->sta_mtx); - } else if (skb->pkt_type == IEEE80211_SDATA_QUEUE_RX_AGG_STOP) { - rx_agg = (void *)&skb->cb; - mutex_lock(&local->sta_mtx); - sta = sta_info_get_bss(sdata, rx_agg->addr); - if (sta) - __ieee80211_stop_rx_ba_session(sta, - rx_agg->tid, - WLAN_BACK_RECIPIENT, 0, - false); - mutex_unlock(&local->sta_mtx); - } else if (ieee80211_is_action(mgmt->frame_control) && - mgmt->u.action.category == WLAN_CATEGORY_BACK) { + if (ieee80211_is_action(mgmt->frame_control) && + mgmt->u.action.category == WLAN_CATEGORY_BACK) { int len = skb->len; mutex_lock(&local->sta_mtx); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 737e1f082b0d..a550c707cd8a 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -345,7 +345,7 @@ int mesh_add_vendor_ies(struct ieee80211_sub_if_data *sdata, data = ifmsh->ie + offset; if (skb_tailroom(skb) < len) return -ENOMEM; - memcpy(skb_put(skb, len), data, len); + skb_put_data(skb, data, len); } return 0; @@ -369,7 +369,7 @@ int mesh_add_rsn_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) if (skb_tailroom(skb) < len) return -ENOMEM; - memcpy(skb_put(skb, len), data, len); + skb_put_data(skb, data, len); return 0; } @@ -690,6 +690,9 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) 2 + sizeof(struct ieee80211_channel_sw_ie) + /* Mesh Channel Switch Parameters */ 2 + sizeof(struct ieee80211_mesh_chansw_params_ie) + + /* Channel Switch Wrapper + Wide Bandwidth CSA IE */ + 2 + 2 + sizeof(struct ieee80211_wide_bw_chansw_ie) + + 2 + sizeof(struct ieee80211_sec_chan_offs_ie) + 2 + 8 + /* supported rates */ 2 + 3; /* DS params */ tail_len = 2 + (IEEE80211_MAX_SUPP_RATES - 8) + @@ -716,8 +719,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) bcn->head = ((u8 *) bcn) + sizeof(*bcn); /* fill in the head */ - mgmt = (struct ieee80211_mgmt *) skb_put(skb, hdr_len); - memset(mgmt, 0, hdr_len); + mgmt = skb_put_zero(skb, hdr_len); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_BEACON); eth_broadcast_addr(mgmt->da); @@ -736,8 +738,12 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) rcu_read_lock(); csa = rcu_dereference(ifmsh->csa); if (csa) { - pos = skb_put(skb, 13); - memset(pos, 0, 13); + enum nl80211_channel_type ct; + struct cfg80211_chan_def *chandef; + int ie_len = 2 + sizeof(struct ieee80211_channel_sw_ie) + + 2 + sizeof(struct ieee80211_mesh_chansw_params_ie); + + pos = skb_put_zero(skb, ie_len); *pos++ = WLAN_EID_CHANNEL_SWITCH; *pos++ = 3; *pos++ = 0x0; @@ -760,6 +766,37 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) pos += 2; put_unaligned_le16(ifmsh->pre_value, pos); pos += 2; + + switch (csa->settings.chandef.width) { + case NL80211_CHAN_WIDTH_40: + ie_len = 2 + sizeof(struct ieee80211_sec_chan_offs_ie); + pos = skb_put_zero(skb, ie_len); + + *pos++ = WLAN_EID_SECONDARY_CHANNEL_OFFSET; /* EID */ + *pos++ = 1; /* len */ + ct = cfg80211_get_chandef_type(&csa->settings.chandef); + if (ct == NL80211_CHAN_HT40PLUS) + *pos++ = IEEE80211_HT_PARAM_CHA_SEC_ABOVE; + else + *pos++ = IEEE80211_HT_PARAM_CHA_SEC_BELOW; + break; + case NL80211_CHAN_WIDTH_80: + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_160: + /* Channel Switch Wrapper + Wide Bandwidth CSA IE */ + ie_len = 2 + 2 + + sizeof(struct ieee80211_wide_bw_chansw_ie); + pos = skb_put_zero(skb, ie_len); + + *pos++ = WLAN_EID_CHANNEL_SWITCH_WRAPPER; /* EID */ + *pos++ = 5; /* len */ + /* put sub IE */ + chandef = &csa->settings.chandef; + ieee80211_ie_build_wide_bw_cs(pos, chandef); + break; + default: + break; + } } rcu_read_unlock(); @@ -916,6 +953,21 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) ieee80211_configure_filter(local); } +static void ieee80211_mesh_csa_mark_radar(struct ieee80211_sub_if_data *sdata) +{ + int err; + + /* if the current channel is a DFS channel, mark the channel as + * unavailable. + */ + err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy, + &sdata->vif.bss_conf.chandef, + NL80211_IFTYPE_MESH_POINT); + if (err > 0) + cfg80211_radar_event(sdata->local->hw.wiphy, + &sdata->vif.bss_conf.chandef, GFP_ATOMIC); +} + static bool ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *elems, bool beacon) @@ -933,19 +985,20 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, if (!sband) return false; - sta_flags = IEEE80211_STA_DISABLE_VHT; + sta_flags = 0; switch (sdata->vif.bss_conf.chandef.width) { case NL80211_CHAN_WIDTH_20_NOHT: sta_flags |= IEEE80211_STA_DISABLE_HT; case NL80211_CHAN_WIDTH_20: sta_flags |= IEEE80211_STA_DISABLE_40MHZ; + case NL80211_CHAN_WIDTH_40: + sta_flags |= IEEE80211_STA_DISABLE_VHT; break; default: break; } memset(¶ms, 0, sizeof(params)); - memset(&csa_ie, 0, sizeof(csa_ie)); err = ieee80211_parse_ch_switch_ie(sdata, elems, sband->band, sta_flags, sdata->vif.addr, &csa_ie); @@ -954,11 +1007,19 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, if (err) return false; + /* Mark the channel unavailable if the reason for the switch is + * regulatory. + */ + if (csa_ie.reason_code == WLAN_REASON_MESH_CHAN_REGULATORY) + ieee80211_mesh_csa_mark_radar(sdata); + params.chandef = csa_ie.chandef; params.count = csa_ie.count; if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, ¶ms.chandef, - IEEE80211_CHAN_DISABLED)) { + IEEE80211_CHAN_DISABLED) || + !cfg80211_reg_can_beacon(sdata->local->hw.wiphy, ¶ms.chandef, + NL80211_IFTYPE_MESH_POINT)) { sdata_info(sdata, "mesh STA %pM switches to unsupported channel (%d MHz, width:%d, CF1/2: %d/%d MHz), aborting\n", sdata->vif.addr, @@ -974,9 +1035,16 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, NL80211_IFTYPE_MESH_POINT); if (err < 0) return false; - if (err > 0) - /* TODO: DFS not (yet) supported */ + if (err > 0 && !ifmsh->userspace_handles_dfs) { + sdata_info(sdata, + "mesh STA %pM switches to channel requiring DFS (%d MHz, width:%d, CF1/2: %d/%d MHz), aborting\n", + sdata->vif.addr, + params.chandef.chan->center_freq, + params.chandef.width, + params.chandef.center_freq1, + params.chandef.center_freq2); return false; + } params.radar_required = err; @@ -1057,8 +1125,8 @@ ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata, goto out; skb_reserve(presp, local->tx_headroom); - memcpy(skb_put(presp, bcn->head_len), bcn->head, bcn->head_len); - memcpy(skb_put(presp, bcn->tail_len), bcn->tail, bcn->tail_len); + skb_put_data(presp, bcn->head, bcn->head_len); + skb_put_data(presp, bcn->tail, bcn->tail_len); hdr = (struct ieee80211_mgmt *) presp->data; hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_RESP); @@ -1197,7 +1265,7 @@ static int mesh_fwd_csa_frame(struct ieee80211_sub_if_data *sdata, if (!skb) return -ENOMEM; skb_reserve(skb, local->tx_headroom); - mgmt_fwd = (struct ieee80211_mgmt *) skb_put(skb, len); + mgmt_fwd = skb_put(skb, len); /* offset_ttl is based on whether the secondary channel * offset is available or not. Subtract 1 from the mesh TTL @@ -1233,7 +1301,7 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata, pos = mgmt->u.action.u.chan_switch.variable; baselen = offsetof(struct ieee80211_mgmt, u.action.u.chan_switch.variable); - ieee802_11_parse_elems(pos, len - baselen, false, &elems); + ieee802_11_parse_elems(pos, len - baselen, true, &elems); ifmsh->chsw_ttl = elems.mesh_chansw_params_ie->mesh_ttl; if (!--ifmsh->chsw_ttl) diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 4005edd71fe8..d8bbd0d2225a 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -120,8 +120,7 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, if (!skb) return -1; skb_reserve(skb, local->tx_headroom); - mgmt = (struct ieee80211_mgmt *) skb_put(skb, hdr_len); - memset(mgmt, 0, hdr_len); + mgmt = skb_put_zero(skb, hdr_len); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); @@ -257,8 +256,7 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, if (!skb) return -1; skb_reserve(skb, local->tx_headroom + sdata->encrypt_headroom); - mgmt = (struct ieee80211_mgmt *) skb_put(skb, hdr_len); - memset(mgmt, 0, hdr_len); + mgmt = skb_put_zero(skb, hdr_len); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 1131cd504a15..f69c6c38ca43 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -242,8 +242,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, return err; info = IEEE80211_SKB_CB(skb); skb_reserve(skb, local->tx_headroom); - mgmt = (struct ieee80211_mgmt *) skb_put(skb, hdr_len); - memset(mgmt, 0, hdr_len); + mgmt = skb_put_zero(skb, hdr_len); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); memcpy(mgmt->da, da, ETH_ALEN); @@ -264,8 +263,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, band = sband->band; /* capability info */ - pos = skb_put(skb, 2); - memset(pos, 0, 2); + pos = skb_put_zero(skb, 2); if (action == WLAN_SP_MESH_PEERING_CONFIRM) { /* AID */ pos = skb_put(skb, 2); diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c index 90a268abea17..d8cd91424175 100644 --- a/net/mac80211/mesh_ps.c +++ b/net/mac80211/mesh_ps.c @@ -30,7 +30,7 @@ static struct sk_buff *mps_qos_null_get(struct sta_info *sta) return NULL; skb_reserve(skb, local->hw.extra_tx_headroom); - nullfunc = (struct ieee80211_hdr *) skb_put(skb, size); + nullfunc = skb_put(skb, size); fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC); ieee80211_fill_mesh_addresses(nullfunc, &fc, sta->sta.addr, sdata->vif.addr); @@ -39,7 +39,7 @@ static struct sk_buff *mps_qos_null_get(struct sta_info *sta) nullfunc->seq_ctrl = 0; /* no address resolution for this frame -> set addr 1 immediately */ memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN); - memset(skb_put(skb, 2), 0, 2); /* append QoS control field */ + skb_put_zero(skb, 2); /* append QoS control field */ ieee80211_mps_set_frame_flags(sdata, sta, nullfunc); return skb; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index cc8e6ea1b27e..b588e593b0ec 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -674,8 +674,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) if (ifmgd->flags & IEEE80211_STA_ENABLE_RRM) capab |= WLAN_CAPABILITY_RADIO_MEASURE; - mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); - memset(mgmt, 0, 24); + mgmt = skb_put_zero(skb, 24); memcpy(mgmt->da, assoc_data->bss->bssid, ETH_ALEN); memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); memcpy(mgmt->bssid, assoc_data->bss->bssid, ETH_ALEN); @@ -797,8 +796,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) after_ric, ARRAY_SIZE(after_ric), offset); - pos = skb_put(skb, noffset - offset); - memcpy(pos, assoc_data->ie + offset, noffset - offset); + skb_put_data(skb, assoc_data->ie + offset, noffset - offset); offset = noffset; } @@ -835,8 +833,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) noffset = ieee80211_ie_split(assoc_data->ie, assoc_data->ie_len, before_vht, ARRAY_SIZE(before_vht), offset); - pos = skb_put(skb, noffset - offset); - memcpy(pos, assoc_data->ie + offset, noffset - offset); + skb_put_data(skb, assoc_data->ie + offset, noffset - offset); offset = noffset; } @@ -849,8 +846,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) noffset = ieee80211_ie_split_vendor(assoc_data->ie, assoc_data->ie_len, offset); - pos = skb_put(skb, noffset - offset); - memcpy(pos, assoc_data->ie + offset, noffset - offset); + skb_put_data(skb, assoc_data->ie + offset, noffset - offset); offset = noffset; } @@ -869,8 +865,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) /* add any remaining custom (i.e. vendor specific here) IEs */ if (assoc_data->ie_len) { noffset = assoc_data->ie_len; - pos = skb_put(skb, noffset - offset); - memcpy(pos, assoc_data->ie + offset, noffset - offset); + skb_put_data(skb, assoc_data->ie + offset, noffset - offset); } if (assoc_data->fils_kek_len && @@ -949,8 +944,7 @@ static void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local, skb_reserve(skb, local->hw.extra_tx_headroom); - nullfunc = (struct ieee80211_hdr *) skb_put(skb, 30); - memset(nullfunc, 0, 30); + nullfunc = skb_put_zero(skb, 30); fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC | IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS); nullfunc->frame_control = fc; @@ -1122,7 +1116,6 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, return; current_band = cbss->channel->band; - memset(&csa_ie, 0, sizeof(csa_ie)); res = ieee80211_parse_ch_switch_ie(sdata, elems, current_band, ifmgd->flags, ifmgd->associated->bssid, &csa_ie); diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index eede5c6db8d5..f8e7a8bbc618 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -885,8 +885,7 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, } skb_reserve(skb, local->hw.extra_tx_headroom); - data = skb_put(skb, params->len); - memcpy(data, params->buf, params->len); + data = skb_put_data(skb, params->buf, params->len); /* Update CSA counters */ if (sdata->vif.csa_active && diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index ea1f4315c521..76f303fda3ed 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -943,6 +943,8 @@ int rate_control_set_rates(struct ieee80211_hw *hw, drv_sta_rate_tbl_update(hw_to_local(hw), sta->sdata, pubsta); + ieee80211_sta_set_expected_throughput(pubsta, sta_get_expected_throughput(sta)); + return 0; } EXPORT_SYMBOL(rate_control_set_rates); @@ -991,4 +993,3 @@ void rate_control_deinitialize(struct ieee80211_local *local) local->rate_ctrl = NULL; rate_control_free(local, ref); } - diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 3674fe3d67dc..70e9d2ca8bbe 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -237,7 +237,6 @@ static void ieee80211_handle_mu_mimo_mon(struct ieee80211_sub_if_data *sdata, if (!skb) return; - skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME; skb_queue_tail(&sdata->skb_queue, skb); ieee80211_queue_work(&sdata->local->hw, &sdata->work); } @@ -274,7 +273,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, if (!(has_fcs && ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS))) mpdulen += FCS_LEN; - rthdr = (struct ieee80211_radiotap_header *)skb_push(skb, rtap_len); + rthdr = skb_push(skb, rtap_len); memset(rthdr, 0, rtap_len - rtap.len - rtap.pad); it_present = &rthdr->it_present; @@ -1217,7 +1216,6 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, /* if this mpdu is fragmented - terminate rx aggregation session */ sc = le16_to_cpu(hdr->seq_ctrl); if (sc & IEEE80211_SCTL_FRAG) { - skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME; skb_queue_tail(&rx->sdata->skb_queue, skb); ieee80211_queue_work(&local->hw, &rx->sdata->work); return; @@ -2100,7 +2098,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) } } while ((skb = __skb_dequeue(&entry->skb_list))) { - memcpy(skb_put(rx->skb, skb->len), skb->data, skb->len); + skb_put_data(rx->skb, skb->data, skb->len); dev_kfree_skb(skb); } @@ -2762,8 +2760,7 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata, return; skb_reserve(skb, local->hw.extra_tx_headroom); - resp = (struct ieee80211_mgmt *) skb_put(skb, 24); - memset(resp, 0, 24); + resp = skb_put_zero(skb, 24); memcpy(resp->da, mgmt->sa, ETH_ALEN); memcpy(resp->sa, sdata->vif.addr, ETH_ALEN); memcpy(resp->bssid, sdata->u.mgd.bssid, ETH_ALEN); @@ -3104,7 +3101,6 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) return RX_QUEUED; queue: - rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME; skb_queue_tail(&sdata->skb_queue, rx->skb); ieee80211_queue_work(&local->hw, &sdata->work); if (rx->sta) @@ -3250,7 +3246,6 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) } /* queue up frame and kick off work to process it */ - rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME; skb_queue_tail(&sdata->skb_queue, rx->skb); ieee80211_queue_work(&rx->local->hw, &sdata->work); if (rx->sta) diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index 0782e486fe89..ee0181778a42 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -36,6 +36,8 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie; int secondary_channel_offset = -1; + memset(csa_ie, 0, sizeof(*csa_ie)); + sec_chan_offs = elems->sec_chan_offs; wide_bw_chansw_ie = elems->wide_bw_chansw_ie; @@ -76,6 +78,11 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, csa_ie->mode = elems->mesh_chansw_params_ie->mesh_flags; csa_ie->pre_value = le16_to_cpu( elems->mesh_chansw_params_ie->mesh_pre_value); + + if (elems->mesh_chansw_params_ie->mesh_flags & + WLAN_EID_CHAN_SWITCH_PARAM_REASON) + csa_ie->reason_code = le16_to_cpu( + elems->mesh_chansw_params_ie->mesh_reason); } new_freq = ieee80211_channel_to_frequency(new_chan_no, new_band); @@ -186,8 +193,7 @@ static void ieee80211_send_refuse_measurement_request(struct ieee80211_sub_if_da return; skb_reserve(skb, local->hw.extra_tx_headroom); - msr_report = (struct ieee80211_mgmt *)skb_put(skb, 24); - memset(msr_report, 0, 24); + msr_report = skb_put_zero(skb, 24); memcpy(msr_report->da, da, ETH_ALEN); memcpy(msr_report->sa, sdata->vif.addr, ETH_ALEN); memcpy(msr_report->bssid, bssid, ETH_ALEN); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 403e3cc58b57..69615016d5bf 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -20,6 +20,7 @@ #include <linux/timer.h> #include <linux/rtnetlink.h> +#include <net/codel.h> #include <net/mac80211.h> #include "ieee80211_i.h" #include "driver-ops.h" @@ -425,6 +426,11 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->sta.max_rc_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_BA; + sta->cparams.ce_threshold = CODEL_DISABLED_THRESHOLD; + sta->cparams.target = MS2TIME(20); + sta->cparams.interval = MS2TIME(100); + sta->cparams.ecn = true; + sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr); return sta; @@ -1306,7 +1312,7 @@ static void ieee80211_send_null_response(struct sta_info *sta, int tid, skb_reserve(skb, local->hw.extra_tx_headroom); - nullfunc = (void *) skb_put(skb, size); + nullfunc = skb_put(skb, size); nullfunc->frame_control = fc; nullfunc->duration_id = 0; memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN); @@ -2310,3 +2316,27 @@ unsigned long ieee80211_sta_last_active(struct sta_info *sta) return stats->last_rx; return sta->status_stats.last_ack; } + +static void sta_update_codel_params(struct sta_info *sta, u32 thr) +{ + if (!sta->sdata->local->ops->wake_tx_queue) + return; + + if (thr && thr < STA_SLOW_THRESHOLD * sta->local->num_sta) { + sta->cparams.target = MS2TIME(50); + sta->cparams.interval = MS2TIME(300); + sta->cparams.ecn = false; + } else { + sta->cparams.target = MS2TIME(20); + sta->cparams.interval = MS2TIME(100); + sta->cparams.ecn = true; + } +} + +void ieee80211_sta_set_expected_throughput(struct ieee80211_sta *pubsta, + u32 thr) +{ + struct sta_info *sta = container_of(pubsta, struct sta_info, sta); + + sta_update_codel_params(sta, thr); +} diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index ea0747d6a6da..3acbdfa9f649 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -233,6 +233,8 @@ struct tid_ampdu_rx { * RX timer expired until the work for it runs * @tid_rx_stop_requested: bitmap indicating which BA sessions per TID the * driver requested to close until the work for it runs + * @tid_rx_manage_offl: bitmap indicating which BA sessions were requested + * to be treated as started/stopped due to offloading * @agg_session_valid: bitmap indicating which TID has a rx BA session open on * @unexpected_agg: bitmap indicating which TID already sent a delBA due to * unexpected aggregation related frames outside a session @@ -250,6 +252,7 @@ struct sta_ampdu_mlme { u8 tid_rx_token[IEEE80211_NUM_TIDS]; unsigned long tid_rx_timer_expired[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; unsigned long tid_rx_stop_requested[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; + unsigned long tid_rx_manage_offl[BITS_TO_LONGS(2 * IEEE80211_NUM_TIDS)]; unsigned long agg_session_valid[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; unsigned long unexpected_agg[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; /* tx */ @@ -396,6 +399,14 @@ struct ieee80211_sta_rx_stats { }; /** + * The bandwidth threshold below which the per-station CoDel parameters will be + * scaled to be more lenient (to prevent starvation of slow stations). This + * value will be scaled by the number of active stations when it is being + * applied. + */ +#define STA_SLOW_THRESHOLD 6000 /* 6 Mbps */ + +/** * struct sta_info - STA information * * This structure collects information about a station that @@ -448,6 +459,7 @@ struct ieee80211_sta_rx_stats { * @known_smps_mode: the smps_mode the client thinks we are in. Relevant for * AP only. * @cipher_scheme: optional cipher scheme for this station + * @cparams: CoDel parameters for this station. * @reserved_tid: reserved TID (if any, otherwise IEEE80211_TID_UNRESERVED) * @fast_tx: TX fastpath information * @fast_rx: RX fastpath information @@ -551,6 +563,8 @@ struct sta_info { enum ieee80211_smps_mode known_smps_mode; const struct ieee80211_cipher_scheme *cipher_scheme; + struct codel_params cparams; + u8 reserved_tid; struct cfg80211_chan_def tdls_chandef; diff --git a/net/mac80211/status.c b/net/mac80211/status.c index be47ac5cd8c8..da7427a41529 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -288,7 +288,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, unsigned char *pos; u16 txflags; - rthdr = (struct ieee80211_radiotap_header *) skb_push(skb, rtap_len); + rthdr = skb_push(skb, rtap_len); memset(rthdr, 0, rtap_len); rthdr->it_len = cpu_to_le16(rtap_len); @@ -546,6 +546,8 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local, skb->wifi_acked_valid = 1; skb->wifi_acked = acked; } + + ieee80211_led_tx(local); } /* @@ -823,8 +825,6 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, } } - ieee80211_led_tx(local); - /* SNMP counters * Fragments are passed to low-level drivers as separate skbs, so these * are actually fragments, not frames. Update frame counters only for diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index f20dcf1b1830..91093d4a2f84 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -49,7 +49,7 @@ static void ieee80211_tdls_add_ext_capab(struct ieee80211_sub_if_data *sdata, !ifmgd->tdls_wider_bw_prohibited; struct ieee80211_supported_band *sband = ieee80211_get_sband(sdata); bool vht = sband && sband->vht_cap.vht_supported; - u8 *pos = (void *)skb_put(skb, 10); + u8 *pos = skb_put(skb, 10); *pos++ = WLAN_EID_EXT_CAPABILITY; *pos++ = 8; /* len */ @@ -168,7 +168,7 @@ static void ieee80211_tdls_add_oper_classes(struct ieee80211_sub_if_data *sdata, static void ieee80211_tdls_add_bss_coex_ie(struct sk_buff *skb) { - u8 *pos = (void *)skb_put(skb, 3); + u8 *pos = skb_put(skb, 3); *pos++ = WLAN_EID_BSS_COEX_2040; *pos++ = 1; /* len */ @@ -209,7 +209,7 @@ static void ieee80211_tdls_add_link_ie(struct ieee80211_sub_if_data *sdata, rsp_addr = sdata->vif.addr; } - lnkid = (void *)skb_put(skb, sizeof(struct ieee80211_tdls_lnkie)); + lnkid = skb_put(skb, sizeof(struct ieee80211_tdls_lnkie)); lnkid->ie_type = WLAN_EID_LINK_ID; lnkid->ie_len = sizeof(struct ieee80211_tdls_lnkie) - 2; @@ -223,7 +223,7 @@ static void ieee80211_tdls_add_aid(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - u8 *pos = (void *)skb_put(skb, 4); + u8 *pos = skb_put(skb, 4); *pos++ = WLAN_EID_AID; *pos++ = 2; /* len */ @@ -271,8 +271,7 @@ static void ieee80211_tdls_add_wmm_param_ie(struct ieee80211_sub_if_data *sdata, struct ieee80211_tx_queue_params *txq; int i; - wmm = (void *)skb_put(skb, sizeof(*wmm)); - memset(wmm, 0, sizeof(*wmm)); + wmm = skb_put_zero(skb, sizeof(*wmm)); wmm->element_id = WLAN_EID_VENDOR_SPECIFIC; wmm->len = sizeof(*wmm) - 2; @@ -389,8 +388,7 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata, before_ext_cap, ARRAY_SIZE(before_ext_cap), offset); - pos = skb_put(skb, noffset - offset); - memcpy(pos, extra_ies + offset, noffset - offset); + skb_put_data(skb, extra_ies + offset, noffset - offset); offset = noffset; } @@ -419,8 +417,7 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata, before_ht_cap, ARRAY_SIZE(before_ht_cap), offset); - pos = skb_put(skb, noffset - offset); - memcpy(pos, extra_ies + offset, noffset - offset); + skb_put_data(skb, extra_ies + offset, noffset - offset); offset = noffset; } @@ -491,8 +488,7 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata, before_vht_cap, ARRAY_SIZE(before_vht_cap), offset); - pos = skb_put(skb, noffset - offset); - memcpy(pos, extra_ies + offset, noffset - offset); + skb_put_data(skb, extra_ies + offset, noffset - offset); offset = noffset; } @@ -533,8 +529,7 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata, /* add any remaining IEs */ if (extra_ies_len) { noffset = extra_ies_len; - pos = skb_put(skb, noffset - offset); - memcpy(pos, extra_ies + offset, noffset - offset); + skb_put_data(skb, extra_ies + offset, noffset - offset); } } @@ -576,8 +571,7 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata, before_qos, ARRAY_SIZE(before_qos), offset); - pos = skb_put(skb, noffset - offset); - memcpy(pos, extra_ies + offset, noffset - offset); + skb_put_data(skb, extra_ies + offset, noffset - offset); offset = noffset; } @@ -597,8 +591,7 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata, before_ht_op, ARRAY_SIZE(before_ht_op), offset); - pos = skb_put(skb, noffset - offset); - memcpy(pos, extra_ies + offset, noffset - offset); + skb_put_data(skb, extra_ies + offset, noffset - offset); offset = noffset; } @@ -639,8 +632,7 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata, /* add any remaining IEs */ if (extra_ies_len) { noffset = extra_ies_len; - pos = skb_put(skb, noffset - offset); - memcpy(pos, extra_ies + offset, noffset - offset); + skb_put_data(skb, extra_ies + offset, noffset - offset); } } @@ -653,7 +645,6 @@ ieee80211_tdls_add_chan_switch_req_ies(struct ieee80211_sub_if_data *sdata, { struct ieee80211_tdls_data *tf; size_t offset = 0, noffset; - u8 *pos; if (WARN_ON_ONCE(!chandef)) return; @@ -671,8 +662,7 @@ ieee80211_tdls_add_chan_switch_req_ies(struct ieee80211_sub_if_data *sdata, before_lnkie, ARRAY_SIZE(before_lnkie), offset); - pos = skb_put(skb, noffset - offset); - memcpy(pos, extra_ies + offset, noffset - offset); + skb_put_data(skb, extra_ies + offset, noffset - offset); offset = noffset; } @@ -681,8 +671,7 @@ ieee80211_tdls_add_chan_switch_req_ies(struct ieee80211_sub_if_data *sdata, /* add any remaining IEs */ if (extra_ies_len) { noffset = extra_ies_len; - pos = skb_put(skb, noffset - offset); - memcpy(pos, extra_ies + offset, noffset - offset); + skb_put_data(skb, extra_ies + offset, noffset - offset); } } @@ -697,7 +686,7 @@ ieee80211_tdls_add_chan_switch_resp_ies(struct ieee80211_sub_if_data *sdata, ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator); if (extra_ies_len) - memcpy(skb_put(skb, extra_ies_len), extra_ies, extra_ies_len); + skb_put_data(skb, extra_ies, extra_ies_len); } static void ieee80211_tdls_add_ies(struct ieee80211_sub_if_data *sdata, @@ -727,8 +716,7 @@ static void ieee80211_tdls_add_ies(struct ieee80211_sub_if_data *sdata, case WLAN_TDLS_TEARDOWN: case WLAN_TDLS_DISCOVERY_REQUEST: if (extra_ies_len) - memcpy(skb_put(skb, extra_ies_len), extra_ies, - extra_ies_len); + skb_put_data(skb, extra_ies, extra_ies_len); if (status_code == 0 || action_code == WLAN_TDLS_TEARDOWN) ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator); break; @@ -756,7 +744,7 @@ ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_tdls_data *tf; - tf = (void *)skb_put(skb, offsetof(struct ieee80211_tdls_data, u)); + tf = skb_put(skb, offsetof(struct ieee80211_tdls_data, u)); memcpy(tf->da, peer, ETH_ALEN); memcpy(tf->sa, sdata->vif.addr, ETH_ALEN); @@ -838,8 +826,7 @@ ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_mgmt *mgmt; - mgmt = (void *)skb_put(skb, 24); - memset(mgmt, 0, 24); + mgmt = skb_put_zero(skb, 24); memcpy(mgmt->da, peer, ETH_ALEN); memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN); diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 0d645bc148d0..3d9ac17af407 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -92,16 +92,19 @@ __field(u16, ssn) \ __field(u8, buf_size) \ __field(bool, amsdu) \ - __field(u16, timeout) + __field(u16, timeout) \ + __field(u16, action) #define AMPDU_ACTION_ASSIGN STA_NAMED_ASSIGN(params->sta); \ __entry->tid = params->tid; \ __entry->ssn = params->ssn; \ __entry->buf_size = params->buf_size; \ __entry->amsdu = params->amsdu; \ - __entry->timeout = params->timeout; -#define AMPDU_ACTION_PR_FMT STA_PR_FMT " tid %d, ssn %d, buf_size %u, amsdu %d, timeout %d" + __entry->timeout = params->timeout; \ + __entry->action = params->action; +#define AMPDU_ACTION_PR_FMT STA_PR_FMT " tid %d, ssn %d, buf_size %u, amsdu %d, timeout %d action %d" #define AMPDU_ACTION_PR_ARG STA_PR_ARG, __entry->tid, __entry->ssn, \ - __entry->buf_size, __entry->amsdu, __entry->timeout + __entry->buf_size, __entry->amsdu, __entry->timeout, \ + __entry->action /* * Tracing for driver callbacks. diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 04b22f8982fe..8858f4f185e9 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -903,8 +903,8 @@ static int ieee80211_fragment(struct ieee80211_tx_data *tx, tmp->dev = skb->dev; /* copy header and data */ - memcpy(skb_put(tmp, hdrlen), skb->data, hdrlen); - memcpy(skb_put(tmp, fraglen), skb->data + pos, fraglen); + skb_put_data(tmp, skb->data, hdrlen); + skb_put_data(tmp, skb->data + pos, fraglen); pos += fraglen; } @@ -1340,9 +1340,16 @@ static struct sk_buff *fq_tin_dequeue_func(struct fq *fq, local = container_of(fq, struct ieee80211_local, fq); txqi = container_of(tin, struct txq_info, tin); - cparams = &local->cparams; cstats = &txqi->cstats; + if (txqi->txq.sta) { + struct sta_info *sta = container_of(txqi->txq.sta, + struct sta_info, sta); + cparams = &sta->cparams; + } else { + cparams = &local->cparams; + } + if (flow == &txqi->def_flow) cvars = &txqi->def_cvars; else @@ -2701,7 +2708,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, if (ieee80211_is_data_qos(fc)) { __le16 *qos_control; - qos_control = (__le16 *) skb_push(skb, 2); + qos_control = skb_push(skb, 2); memcpy(skb_push(skb, hdrlen - 2), &hdr, hdrlen - 2); /* * Maybe we could actually set some fields here, for now just @@ -3037,7 +3044,7 @@ static bool ieee80211_amsdu_realloc_pad(struct ieee80211_local *local, if (padding) { *subframe_len += padding; - memset(skb_put(skb, padding), 0, padding); + skb_put_zero(skb, padding); } return true; @@ -3340,7 +3347,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, } memcpy(ð, skb->data, ETH_HLEN - 2); - hdr = (void *)skb_push(skb, extra_head); + hdr = skb_push(skb, extra_head); memcpy(skb->data, fast_tx->hdr, fast_tx->hdr_len); memcpy(skb->data + fast_tx->da_offs, eth.h_dest, ETH_ALEN); memcpy(skb->data + fast_tx->sa_offs, eth.h_source, ETH_ALEN); @@ -3867,7 +3874,7 @@ static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, ps->dtim_count--; } - tim = pos = (u8 *) skb_put(skb, 6); + tim = pos = skb_put(skb, 6); *pos++ = WLAN_EID_TIM; *pos++ = 4; *pos++ = ps->dtim_count; @@ -4125,8 +4132,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, goto out; skb_reserve(skb, local->tx_headroom); - memcpy(skb_put(skb, beacon->head_len), beacon->head, - beacon->head_len); + skb_put_data(skb, beacon->head, beacon->head_len); ieee80211_beacon_add_tim(sdata, &ap->ps, skb, is_template); @@ -4140,8 +4146,8 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, } if (beacon->tail) - memcpy(skb_put(skb, beacon->tail_len), - beacon->tail, beacon->tail_len); + skb_put_data(skb, beacon->tail, + beacon->tail_len); } else goto out; } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { @@ -4164,8 +4170,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, if (!skb) goto out; skb_reserve(skb, local->tx_headroom); - memcpy(skb_put(skb, beacon->head_len), beacon->head, - beacon->head_len); + skb_put_data(skb, beacon->head, beacon->head_len); hdr = (struct ieee80211_hdr *) skb->data; hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | @@ -4200,8 +4205,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, if (!skb) goto out; skb_reserve(skb, local->tx_headroom); - memcpy(skb_put(skb, beacon->head_len), beacon->head, - beacon->head_len); + skb_put_data(skb, beacon->head, beacon->head_len); ieee80211_beacon_add_tim(sdata, &ifmsh->ps, skb, is_template); if (offs) { @@ -4209,8 +4213,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, offs->tim_length = skb->len - beacon->head_len; } - memcpy(skb_put(skb, beacon->tail_len), beacon->tail, - beacon->tail_len); + skb_put_data(skb, beacon->tail, beacon->tail_len); } else { WARN_ON(1); goto out; @@ -4330,7 +4333,7 @@ struct sk_buff *ieee80211_proberesp_get(struct ieee80211_hw *hw, if (!skb) goto out; - memcpy(skb_put(skb, presp->len), presp->data, presp->len); + skb_put_data(skb, presp->data, presp->len); hdr = (struct ieee80211_hdr *) skb->data; memset(hdr->addr1, 0, sizeof(hdr->addr1)); @@ -4363,8 +4366,7 @@ struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw, skb_reserve(skb, local->hw.extra_tx_headroom); - pspoll = (struct ieee80211_pspoll *) skb_put(skb, sizeof(*pspoll)); - memset(pspoll, 0, sizeof(*pspoll)); + pspoll = skb_put_zero(skb, sizeof(*pspoll)); pspoll->frame_control = cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_PSPOLL); pspoll->aid = cpu_to_le16(ifmgd->aid); @@ -4401,9 +4403,7 @@ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, skb_reserve(skb, local->hw.extra_tx_headroom); - nullfunc = (struct ieee80211_hdr_3addr *) skb_put(skb, - sizeof(*nullfunc)); - memset(nullfunc, 0, sizeof(*nullfunc)); + nullfunc = skb_put_zero(skb, sizeof(*nullfunc)); nullfunc->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC | IEEE80211_FCTL_TODS); @@ -4435,8 +4435,7 @@ struct sk_buff *ieee80211_probereq_get(struct ieee80211_hw *hw, skb_reserve(skb, local->hw.extra_tx_headroom); - hdr = (struct ieee80211_hdr_3addr *) skb_put(skb, sizeof(*hdr)); - memset(hdr, 0, sizeof(*hdr)); + hdr = skb_put_zero(skb, sizeof(*hdr)); hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_REQ); eth_broadcast_addr(hdr->addr1); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index ac9ac6c35594..259698de569f 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1242,8 +1242,7 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, skb_reserve(skb, local->hw.extra_tx_headroom + IEEE80211_WEP_IV_LEN); - mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24 + 6); - memset(mgmt, 0, 24 + 6); + mgmt = skb_put_zero(skb, 24 + 6); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_AUTH); memcpy(mgmt->da, da, ETH_ALEN); @@ -1253,7 +1252,7 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, mgmt->u.auth.auth_transaction = cpu_to_le16(transaction); mgmt->u.auth.status_code = cpu_to_le16(status); if (extra) - memcpy(skb_put(skb, extra_len), extra, extra_len); + skb_put_data(skb, extra, extra_len); if (auth_alg == WLAN_AUTH_SHARED_KEY && transaction == 3) { mgmt->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); @@ -1293,8 +1292,7 @@ void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, skb_reserve(skb, local->hw.extra_tx_headroom); /* copy in frame */ - memcpy(skb_put(skb, IEEE80211_DEAUTH_FRAME_LEN), - mgmt, IEEE80211_DEAUTH_FRAME_LEN); + skb_put_data(skb, mgmt, IEEE80211_DEAUTH_FRAME_LEN); if (sdata->vif.type != NL80211_IFTYPE_STATION || !(sdata->u.mgd.flags & IEEE80211_STA_MFP_ENABLED)) @@ -2414,6 +2412,35 @@ u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, return pos + sizeof(struct ieee80211_ht_operation); } +void ieee80211_ie_build_wide_bw_cs(u8 *pos, + const struct cfg80211_chan_def *chandef) +{ + *pos++ = WLAN_EID_WIDE_BW_CHANNEL_SWITCH; /* EID */ + *pos++ = 3; /* IE length */ + /* New channel width */ + switch (chandef->width) { + case NL80211_CHAN_WIDTH_80: + *pos++ = IEEE80211_VHT_CHANWIDTH_80MHZ; + break; + case NL80211_CHAN_WIDTH_160: + *pos++ = IEEE80211_VHT_CHANWIDTH_160MHZ; + break; + case NL80211_CHAN_WIDTH_80P80: + *pos++ = IEEE80211_VHT_CHANWIDTH_80P80MHZ; + break; + default: + *pos++ = IEEE80211_VHT_CHANWIDTH_USE_HT; + } + + /* new center frequency segment 0 */ + *pos++ = ieee80211_frequency_to_channel(chandef->center_freq1); + /* new center frequency segment 1 */ + if (chandef->center_freq2) + *pos++ = ieee80211_frequency_to_channel(chandef->center_freq2); + else + *pos++ = 0; +} + u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, const struct cfg80211_chan_def *chandef) { @@ -2964,13 +2991,13 @@ int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata, skb = dev_alloc_skb(local->tx_headroom + hdr_len + 5 + /* channel switch announcement element */ 3 + /* secondary channel offset element */ + 5 + /* wide bandwidth channel switch announcement */ 8); /* mesh channel switch parameters element */ if (!skb) return -ENOMEM; skb_reserve(skb, local->tx_headroom); - mgmt = (struct ieee80211_mgmt *)skb_put(skb, hdr_len); - memset(mgmt, 0, hdr_len); + mgmt = skb_put_zero(skb, hdr_len); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); @@ -3022,6 +3049,13 @@ int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata, pos += 2; } + if (csa_settings->chandef.width == NL80211_CHAN_WIDTH_80 || + csa_settings->chandef.width == NL80211_CHAN_WIDTH_80P80 || + csa_settings->chandef.width == NL80211_CHAN_WIDTH_160) { + skb_put(skb, 5); + ieee80211_ie_build_wide_bw_cs(pos, &csa_settings->chandef); + } + ieee80211_tx_skb(sdata, skb); return 0; } diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index cc19614ff4e6..0d722ea98a1b 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -949,7 +949,7 @@ ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx) if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie))) return TX_DROP; - mmie = (struct ieee80211_mmie *) skb_put(skb, sizeof(*mmie)); + mmie = skb_put(skb, sizeof(*mmie)); mmie->element_id = WLAN_EID_MMIE; mmie->length = sizeof(*mmie) - 2; mmie->key_id = cpu_to_le16(key->conf.keyidx); @@ -993,7 +993,7 @@ ieee80211_crypto_aes_cmac_256_encrypt(struct ieee80211_tx_data *tx) if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie))) return TX_DROP; - mmie = (struct ieee80211_mmie_16 *)skb_put(skb, sizeof(*mmie)); + mmie = skb_put(skb, sizeof(*mmie)); mmie->element_id = WLAN_EID_MMIE; mmie->length = sizeof(*mmie) - 2; mmie->key_id = cpu_to_le16(key->conf.keyidx); @@ -1138,7 +1138,7 @@ ieee80211_crypto_aes_gmac_encrypt(struct ieee80211_tx_data *tx) if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie))) return TX_DROP; - mmie = (struct ieee80211_mmie_16 *)skb_put(skb, sizeof(*mmie)); + mmie = skb_put(skb, sizeof(*mmie)); mmie->element_id = WLAN_EID_MMIE; mmie->length = sizeof(*mmie) - 2; mmie->key_id = cpu_to_le16(key->conf.keyidx); diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 7b05fd1497ce..bdcfb2d04cd2 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -684,6 +684,54 @@ errout: return err; } +static int nla_get_via(const struct nlattr *nla, u8 *via_alen, u8 *via_table, + u8 via_addr[], struct netlink_ext_ack *extack) +{ + struct rtvia *via = nla_data(nla); + int err = -EINVAL; + int alen; + + if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) { + NL_SET_ERR_MSG_ATTR(extack, nla, + "Invalid attribute length for RTA_VIA"); + goto errout; + } + alen = nla_len(nla) - + offsetof(struct rtvia, rtvia_addr); + if (alen > MAX_VIA_ALEN) { + NL_SET_ERR_MSG_ATTR(extack, nla, + "Invalid address length for RTA_VIA"); + goto errout; + } + + /* Validate the address family */ + switch (via->rtvia_family) { + case AF_PACKET: + *via_table = NEIGH_LINK_TABLE; + break; + case AF_INET: + *via_table = NEIGH_ARP_TABLE; + if (alen != 4) + goto errout; + break; + case AF_INET6: + *via_table = NEIGH_ND_TABLE; + if (alen != 16) + goto errout; + break; + default: + /* Unsupported address family */ + goto errout; + } + + memcpy(via_addr, via->rtvia_addr, alen); + *via_alen = alen; + err = 0; + +errout: + return err; +} + static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg, struct mpls_route *rt) { @@ -695,8 +743,6 @@ static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg, if (!nh) return -ENOMEM; - err = -EINVAL; - nh->nh_labels = cfg->rc_output_labels; for (i = 0; i < nh->nh_labels; i++) nh->nh_label[i] = cfg->rc_output_label[i]; @@ -720,7 +766,8 @@ errout: static int mpls_nh_build(struct net *net, struct mpls_route *rt, struct mpls_nh *nh, int oif, struct nlattr *via, - struct nlattr *newdst, u8 max_labels) + struct nlattr *newdst, u8 max_labels, + struct netlink_ext_ack *extack) { int err = -ENOMEM; @@ -728,15 +775,15 @@ static int mpls_nh_build(struct net *net, struct mpls_route *rt, goto errout; if (newdst) { - err = nla_get_labels(newdst, max_labels, - &nh->nh_labels, nh->nh_label); + err = nla_get_labels(newdst, max_labels, &nh->nh_labels, + nh->nh_label, extack); if (err) goto errout; } if (via) { err = nla_get_via(via, &nh->nh_via_alen, &nh->nh_via_table, - __mpls_nh_via(rt, nh)); + __mpls_nh_via(rt, nh), extack); if (err) goto errout; } else { @@ -782,7 +829,8 @@ static u8 mpls_count_nexthops(struct rtnexthop *rtnh, int len, nla = nla_find(attrs, attrlen, RTA_NEWDST); if (nla && - nla_get_labels(nla, MAX_NEW_LABELS, &n_labels, NULL) != 0) + nla_get_labels(nla, MAX_NEW_LABELS, &n_labels, + NULL, NULL) != 0) return 0; *max_labels = max_t(u8, *max_labels, n_labels); @@ -802,7 +850,8 @@ static u8 mpls_count_nexthops(struct rtnexthop *rtnh, int len, } static int mpls_nh_build_multi(struct mpls_route_config *cfg, - struct mpls_route *rt, u8 max_labels) + struct mpls_route *rt, u8 max_labels, + struct netlink_ext_ack *extack) { struct rtnexthop *rtnh = cfg->rc_mp; struct nlattr *nla_via, *nla_newdst; @@ -836,7 +885,7 @@ static int mpls_nh_build_multi(struct mpls_route_config *cfg, err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh, rtnh->rtnh_ifindex, nla_via, nla_newdst, - max_labels); + max_labels, extack); if (err) goto errout; @@ -855,7 +904,28 @@ errout: return err; } -static int mpls_route_add(struct mpls_route_config *cfg) +static bool mpls_label_ok(struct net *net, unsigned int index, + struct netlink_ext_ack *extack) +{ + /* Reserved labels may not be set */ + if (index < MPLS_LABEL_FIRST_UNRESERVED) { + NL_SET_ERR_MSG(extack, + "Invalid label - must be MPLS_LABEL_FIRST_UNRESERVED or higher"); + return false; + } + + /* The full 20 bit range may not be supported. */ + if (index >= net->mpls.platform_labels) { + NL_SET_ERR_MSG(extack, + "Label >= configured maximum in platform_labels"); + return false; + } + + return true; +} + +static int mpls_route_add(struct mpls_route_config *cfg, + struct netlink_ext_ack *extack) { struct mpls_route __rcu **platform_label; struct net *net = cfg->rc_nlinfo.nl_net; @@ -874,18 +944,15 @@ static int mpls_route_add(struct mpls_route_config *cfg) index = find_free_label(net); } - /* Reserved labels may not be set */ - if (index < MPLS_LABEL_FIRST_UNRESERVED) - goto errout; - - /* The full 20 bit range may not be supported. */ - if (index >= net->mpls.platform_labels) + if (!mpls_label_ok(net, index, extack)) goto errout; /* Append makes no sense with mpls */ err = -EOPNOTSUPP; - if (cfg->rc_nlflags & NLM_F_APPEND) + if (cfg->rc_nlflags & NLM_F_APPEND) { + NL_SET_ERR_MSG(extack, "MPLS does not support route append"); goto errout; + } err = -EEXIST; platform_label = rtnl_dereference(net->mpls.platform_label); @@ -912,8 +979,10 @@ static int mpls_route_add(struct mpls_route_config *cfg) nhs = 1; } - if (nhs == 0) + if (nhs == 0) { + NL_SET_ERR_MSG(extack, "Route does not contain a nexthop"); goto errout; + } err = -ENOMEM; rt = mpls_rt_alloc(nhs, max_via_alen, max_labels); @@ -927,7 +996,7 @@ static int mpls_route_add(struct mpls_route_config *cfg) rt->rt_ttl_propagate = cfg->rc_ttl_propagate; if (cfg->rc_mp) - err = mpls_nh_build_multi(cfg, rt, max_labels); + err = mpls_nh_build_multi(cfg, rt, max_labels, extack); else err = mpls_nh_build_from_cfg(cfg, rt); if (err) @@ -943,7 +1012,8 @@ errout: return err; } -static int mpls_route_del(struct mpls_route_config *cfg) +static int mpls_route_del(struct mpls_route_config *cfg, + struct netlink_ext_ack *extack) { struct net *net = cfg->rc_nlinfo.nl_net; unsigned index; @@ -951,12 +1021,7 @@ static int mpls_route_del(struct mpls_route_config *cfg) index = cfg->rc_label; - /* Reserved labels may not be removed */ - if (index < MPLS_LABEL_FIRST_UNRESERVED) - goto errout; - - /* The full 20 bit range may not be supported */ - if (index >= net->mpls.platform_labels) + if (!mpls_label_ok(net, index, extack)) goto errout; mpls_route_update(net, index, NULL, &cfg->rc_nlinfo); @@ -1541,8 +1606,8 @@ int nla_put_labels(struct sk_buff *skb, int attrtype, } EXPORT_SYMBOL_GPL(nla_put_labels); -int nla_get_labels(const struct nlattr *nla, - u8 max_labels, u8 *labels, u32 label[]) +int nla_get_labels(const struct nlattr *nla, u8 max_labels, u8 *labels, + u32 label[], struct netlink_ext_ack *extack) { unsigned len = nla_len(nla); struct mpls_shim_hdr *nla_label; @@ -1553,13 +1618,18 @@ int nla_get_labels(const struct nlattr *nla, /* len needs to be an even multiple of 4 (the label size). Number * of labels is a u8 so check for overflow. */ - if (len & 3 || len / 4 > 255) + if (len & 3 || len / 4 > 255) { + NL_SET_ERR_MSG_ATTR(extack, nla, + "Invalid length for labels attribute"); return -EINVAL; + } /* Limit the number of new labels allowed */ nla_labels = len/4; - if (nla_labels > max_labels) + if (nla_labels > max_labels) { + NL_SET_ERR_MSG(extack, "Too many labels"); return -EINVAL; + } /* when label == NULL, caller wants number of labels */ if (!label) @@ -1574,8 +1644,29 @@ int nla_get_labels(const struct nlattr *nla, /* Ensure the bottom of stack flag is properly set * and ttl and tc are both clear. */ - if ((dec.bos != bos) || dec.ttl || dec.tc) + if (dec.ttl) { + NL_SET_ERR_MSG_ATTR(extack, nla, + "TTL in label must be 0"); + return -EINVAL; + } + + if (dec.tc) { + NL_SET_ERR_MSG_ATTR(extack, nla, + "Traffic class in label must be 0"); + return -EINVAL; + } + + if (dec.bos != bos) { + NL_SET_BAD_ATTR(extack, nla); + if (bos) { + NL_SET_ERR_MSG(extack, + "BOS bit must be set in first label"); + } else { + NL_SET_ERR_MSG(extack, + "BOS bit can only be set in first label"); + } return -EINVAL; + } switch (dec.label) { case MPLS_LABEL_IMPLNULL: @@ -1583,6 +1674,8 @@ int nla_get_labels(const struct nlattr *nla, * assign and distribute, but which never * actually appears in the encapsulation. */ + NL_SET_ERR_MSG_ATTR(extack, nla, + "Implicit NULL Label (3) can not be used in encapsulation"); return -EINVAL; } @@ -1594,50 +1687,10 @@ out: } EXPORT_SYMBOL_GPL(nla_get_labels); -int nla_get_via(const struct nlattr *nla, u8 *via_alen, - u8 *via_table, u8 via_addr[]) -{ - struct rtvia *via = nla_data(nla); - int err = -EINVAL; - int alen; - - if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) - goto errout; - alen = nla_len(nla) - - offsetof(struct rtvia, rtvia_addr); - if (alen > MAX_VIA_ALEN) - goto errout; - - /* Validate the address family */ - switch (via->rtvia_family) { - case AF_PACKET: - *via_table = NEIGH_LINK_TABLE; - break; - case AF_INET: - *via_table = NEIGH_ARP_TABLE; - if (alen != 4) - goto errout; - break; - case AF_INET6: - *via_table = NEIGH_ND_TABLE; - if (alen != 16) - goto errout; - break; - default: - /* Unsupported address family */ - goto errout; - } - - memcpy(via_addr, via->rtvia_addr, alen); - *via_alen = alen; - err = 0; - -errout: - return err; -} - -static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, - struct mpls_route_config *cfg) +static int rtm_to_route_config(struct sk_buff *skb, + struct nlmsghdr *nlh, + struct mpls_route_config *cfg, + struct netlink_ext_ack *extack) { struct rtmsg *rtm; struct nlattr *tb[RTA_MAX+1]; @@ -1645,35 +1698,54 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, int err; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_mpls_policy, - NULL); + extack); if (err < 0) goto errout; err = -EINVAL; rtm = nlmsg_data(nlh); - if (rtm->rtm_family != AF_MPLS) + if (rtm->rtm_family != AF_MPLS) { + NL_SET_ERR_MSG(extack, "Invalid address family in rtmsg"); goto errout; - if (rtm->rtm_dst_len != 20) + } + if (rtm->rtm_dst_len != 20) { + NL_SET_ERR_MSG(extack, "rtm_dst_len must be 20 for MPLS"); goto errout; - if (rtm->rtm_src_len != 0) + } + if (rtm->rtm_src_len != 0) { + NL_SET_ERR_MSG(extack, "rtm_src_len must be 0 for MPLS"); goto errout; - if (rtm->rtm_tos != 0) + } + if (rtm->rtm_tos != 0) { + NL_SET_ERR_MSG(extack, "rtm_tos must be 0 for MPLS"); goto errout; - if (rtm->rtm_table != RT_TABLE_MAIN) + } + if (rtm->rtm_table != RT_TABLE_MAIN) { + NL_SET_ERR_MSG(extack, + "MPLS only supports the main route table"); goto errout; + } /* Any value is acceptable for rtm_protocol */ /* As mpls uses destination specific addresses * (or source specific address in the case of multicast) * all addresses have universal scope. */ - if (rtm->rtm_scope != RT_SCOPE_UNIVERSE) + if (rtm->rtm_scope != RT_SCOPE_UNIVERSE) { + NL_SET_ERR_MSG(extack, + "Invalid route scope - MPLS only supports UNIVERSE"); goto errout; - if (rtm->rtm_type != RTN_UNICAST) + } + if (rtm->rtm_type != RTN_UNICAST) { + NL_SET_ERR_MSG(extack, + "Invalid route type - MPLS only supports UNICAST"); goto errout; - if (rtm->rtm_flags != 0) + } + if (rtm->rtm_flags != 0) { + NL_SET_ERR_MSG(extack, "rtm_flags must be 0 for MPLS"); goto errout; + } cfg->rc_label = LABEL_NOT_SPECIFIED; cfg->rc_protocol = rtm->rtm_protocol; @@ -1696,26 +1768,26 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, case RTA_NEWDST: if (nla_get_labels(nla, MAX_NEW_LABELS, &cfg->rc_output_labels, - cfg->rc_output_label)) + cfg->rc_output_label, extack)) goto errout; break; case RTA_DST: { u8 label_count; if (nla_get_labels(nla, 1, &label_count, - &cfg->rc_label)) + &cfg->rc_label, extack)) goto errout; - /* Reserved labels may not be set */ - if (cfg->rc_label < MPLS_LABEL_FIRST_UNRESERVED) + if (!mpls_label_ok(cfg->rc_nlinfo.nl_net, + cfg->rc_label, extack)) goto errout; - break; } case RTA_VIA: { if (nla_get_via(nla, &cfg->rc_via_alen, - &cfg->rc_via_table, cfg->rc_via)) + &cfg->rc_via_table, cfg->rc_via, + extack)) goto errout; break; } @@ -1729,14 +1801,18 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, { u8 ttl_propagate = nla_get_u8(nla); - if (ttl_propagate > 1) + if (ttl_propagate > 1) { + NL_SET_ERR_MSG_ATTR(extack, nla, + "RTA_TTL_PROPAGATE can only be 0 or 1"); goto errout; + } cfg->rc_ttl_propagate = ttl_propagate ? MPLS_TTL_PROP_ENABLED : MPLS_TTL_PROP_DISABLED; break; } default: + NL_SET_ERR_MSG_ATTR(extack, nla, "Unknown attribute"); /* Unsupported attribute */ goto errout; } @@ -1757,11 +1833,11 @@ static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, if (!cfg) return -ENOMEM; - err = rtm_to_route_config(skb, nlh, cfg); + err = rtm_to_route_config(skb, nlh, cfg, extack); if (err < 0) goto out; - err = mpls_route_del(cfg); + err = mpls_route_del(cfg, extack); out: kfree(cfg); @@ -1779,11 +1855,11 @@ static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, if (!cfg) return -ENOMEM; - err = rtm_to_route_config(skb, nlh, cfg); + err = rtm_to_route_config(skb, nlh, cfg, extack); if (err < 0) goto out; - err = mpls_route_add(cfg); + err = mpls_route_add(cfg, extack); out: kfree(cfg); @@ -1995,6 +2071,166 @@ errout: rtnl_set_sk_err(net, RTNLGRP_MPLS_ROUTE, err); } +static int mpls_getroute(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, + struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(in_skb->sk); + u32 portid = NETLINK_CB(in_skb).portid; + struct nlattr *tb[RTA_MAX + 1]; + u32 labels[MAX_NEW_LABELS]; + struct mpls_shim_hdr *hdr; + unsigned int hdr_size = 0; + struct net_device *dev; + struct mpls_route *rt; + struct rtmsg *rtm, *r; + struct nlmsghdr *nlh; + struct sk_buff *skb; + struct mpls_nh *nh; + int err = -EINVAL; + u32 in_label; + u8 n_labels; + + err = nlmsg_parse(in_nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_mpls_policy, extack); + if (err < 0) + goto errout; + + rtm = nlmsg_data(in_nlh); + + if (tb[RTA_DST]) { + u8 label_count; + + if (nla_get_labels(tb[RTA_DST], 1, &label_count, + &in_label, extack)) + goto errout; + + if (in_label < MPLS_LABEL_FIRST_UNRESERVED) + goto errout; + } + + rt = mpls_route_input_rcu(net, in_label); + if (!rt) { + err = -ENETUNREACH; + goto errout; + } + + if (rtm->rtm_flags & RTM_F_FIB_MATCH) { + skb = nlmsg_new(lfib_nlmsg_size(rt), GFP_KERNEL); + if (!skb) { + err = -ENOBUFS; + goto errout; + } + + err = mpls_dump_route(skb, portid, in_nlh->nlmsg_seq, + RTM_NEWROUTE, in_label, rt, 0); + if (err < 0) { + /* -EMSGSIZE implies BUG in lfib_nlmsg_size */ + WARN_ON(err == -EMSGSIZE); + goto errout_free; + } + + return rtnl_unicast(skb, net, portid); + } + + if (tb[RTA_NEWDST]) { + if (nla_get_labels(tb[RTA_NEWDST], MAX_NEW_LABELS, &n_labels, + labels, extack) != 0) { + err = -EINVAL; + goto errout; + } + + hdr_size = n_labels * sizeof(struct mpls_shim_hdr); + } + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) { + err = -ENOBUFS; + goto errout; + } + + skb->protocol = htons(ETH_P_MPLS_UC); + + if (hdr_size) { + bool bos; + int i; + + if (skb_cow(skb, hdr_size)) { + err = -ENOBUFS; + goto errout_free; + } + + skb_reserve(skb, hdr_size); + skb_push(skb, hdr_size); + skb_reset_network_header(skb); + + /* Push new labels */ + hdr = mpls_hdr(skb); + bos = true; + for (i = n_labels - 1; i >= 0; i--) { + hdr[i] = mpls_entry_encode(labels[i], + 1, 0, bos); + bos = false; + } + } + + nh = mpls_select_multipath(rt, skb); + if (!nh) { + err = -ENETUNREACH; + goto errout_free; + } + + if (hdr_size) { + skb_pull(skb, hdr_size); + skb_reset_network_header(skb); + } + + nlh = nlmsg_put(skb, portid, in_nlh->nlmsg_seq, + RTM_NEWROUTE, sizeof(*r), 0); + if (!nlh) { + err = -EMSGSIZE; + goto errout_free; + } + + r = nlmsg_data(nlh); + r->rtm_family = AF_MPLS; + r->rtm_dst_len = 20; + r->rtm_src_len = 0; + r->rtm_table = RT_TABLE_MAIN; + r->rtm_type = RTN_UNICAST; + r->rtm_scope = RT_SCOPE_UNIVERSE; + r->rtm_protocol = rt->rt_protocol; + r->rtm_flags = 0; + + if (nla_put_labels(skb, RTA_DST, 1, &in_label)) + goto nla_put_failure; + + if (nh->nh_labels && + nla_put_labels(skb, RTA_NEWDST, nh->nh_labels, + nh->nh_label)) + goto nla_put_failure; + + if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC && + nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh), + nh->nh_via_alen)) + goto nla_put_failure; + dev = rtnl_dereference(nh->nh_dev); + if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex)) + goto nla_put_failure; + + nlmsg_end(skb, nlh); + + err = rtnl_unicast(skb, net, portid); +errout: + return err; + +nla_put_failure: + nlmsg_cancel(skb, nlh); + err = -EMSGSIZE; +errout_free: + kfree_skb(skb); + return err; +} + static int resize_platform_label_table(struct net *net, size_t limit) { size_t size = sizeof(struct mpls_route *) * limit; @@ -2241,7 +2477,8 @@ static int __init mpls_init(void) rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, NULL); rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, NULL); - rtnl_register(PF_MPLS, RTM_GETROUTE, NULL, mpls_dump_routes, NULL); + rtnl_register(PF_MPLS, RTM_GETROUTE, mpls_getroute, mpls_dump_routes, + NULL); rtnl_register(PF_MPLS, RTM_GETNETCONF, mpls_netconf_get_devconf, mpls_netconf_dump_devconf, NULL); err = 0; diff --git a/net/mpls/internal.h b/net/mpls/internal.h index 4db6a5971322..cf65aec2e551 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -203,9 +203,7 @@ static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev) int nla_put_labels(struct sk_buff *skb, int attrtype, u8 labels, const u32 label[]); int nla_get_labels(const struct nlattr *nla, u8 max_labels, u8 *labels, - u32 label[]); -int nla_get_via(const struct nlattr *nla, u8 *via_alen, u8 *via_table, - u8 via[]); + u32 label[], struct netlink_ext_ack *extack); bool mpls_output_possible(const struct net_device *dev); unsigned int mpls_dev_mtu(const struct net_device *dev); bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu); diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index 369c7a23c86c..6e558a419f60 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -159,7 +159,8 @@ drop: static int mpls_build_state(struct nlattr *nla, unsigned int family, const void *cfg, - struct lwtunnel_state **ts) + struct lwtunnel_state **ts, + struct netlink_ext_ack *extack) { struct mpls_iptunnel_encap *tun_encap_info; struct nlattr *tb[MPLS_IPTUNNEL_MAX + 1]; @@ -168,17 +169,18 @@ static int mpls_build_state(struct nlattr *nla, int ret; ret = nla_parse_nested(tb, MPLS_IPTUNNEL_MAX, nla, - mpls_iptunnel_policy, NULL); + mpls_iptunnel_policy, extack); if (ret < 0) return ret; - if (!tb[MPLS_IPTUNNEL_DST]) + if (!tb[MPLS_IPTUNNEL_DST]) { + NL_SET_ERR_MSG(extack, "MPLS_IPTUNNEL_DST attribute is missing"); return -EINVAL; - + } /* determine number of labels */ - if (nla_get_labels(tb[MPLS_IPTUNNEL_DST], - MAX_NEW_LABELS, &n_labels, NULL)) + if (nla_get_labels(tb[MPLS_IPTUNNEL_DST], MAX_NEW_LABELS, + &n_labels, NULL, extack)) return -EINVAL; newts = lwtunnel_state_alloc(sizeof(*tun_encap_info) + @@ -188,7 +190,8 @@ static int mpls_build_state(struct nlattr *nla, tun_encap_info = mpls_lwtunnel_encap(newts); ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], n_labels, - &tun_encap_info->labels, tun_encap_info->label); + &tun_encap_info->labels, tun_encap_info->label, + extack); if (ret) goto errout; diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c index db7083bfd476..5e03ed190e18 100644 --- a/net/ncsi/ncsi-cmd.c +++ b/net/ncsi/ncsi-cmd.c @@ -66,8 +66,7 @@ static int ncsi_cmd_handler_default(struct sk_buff *skb, { struct ncsi_cmd_pkt *cmd; - cmd = (struct ncsi_cmd_pkt *)skb_put(skb, sizeof(*cmd)); - memset(cmd, 0, sizeof(*cmd)); + cmd = skb_put_zero(skb, sizeof(*cmd)); ncsi_cmd_build_header(&cmd->cmd.common, nca); return 0; @@ -78,8 +77,7 @@ static int ncsi_cmd_handler_sp(struct sk_buff *skb, { struct ncsi_cmd_sp_pkt *cmd; - cmd = (struct ncsi_cmd_sp_pkt *)skb_put(skb, sizeof(*cmd)); - memset(cmd, 0, sizeof(*cmd)); + cmd = skb_put_zero(skb, sizeof(*cmd)); cmd->hw_arbitration = nca->bytes[0]; ncsi_cmd_build_header(&cmd->cmd.common, nca); @@ -91,8 +89,7 @@ static int ncsi_cmd_handler_dc(struct sk_buff *skb, { struct ncsi_cmd_dc_pkt *cmd; - cmd = (struct ncsi_cmd_dc_pkt *)skb_put(skb, sizeof(*cmd)); - memset(cmd, 0, sizeof(*cmd)); + cmd = skb_put_zero(skb, sizeof(*cmd)); cmd->ald = nca->bytes[0]; ncsi_cmd_build_header(&cmd->cmd.common, nca); @@ -104,8 +101,7 @@ static int ncsi_cmd_handler_rc(struct sk_buff *skb, { struct ncsi_cmd_rc_pkt *cmd; - cmd = (struct ncsi_cmd_rc_pkt *)skb_put(skb, sizeof(*cmd)); - memset(cmd, 0, sizeof(*cmd)); + cmd = skb_put_zero(skb, sizeof(*cmd)); ncsi_cmd_build_header(&cmd->cmd.common, nca); return 0; @@ -116,8 +112,7 @@ static int ncsi_cmd_handler_ae(struct sk_buff *skb, { struct ncsi_cmd_ae_pkt *cmd; - cmd = (struct ncsi_cmd_ae_pkt *)skb_put(skb, sizeof(*cmd)); - memset(cmd, 0, sizeof(*cmd)); + cmd = skb_put_zero(skb, sizeof(*cmd)); cmd->mc_id = nca->bytes[0]; cmd->mode = htonl(nca->dwords[1]); ncsi_cmd_build_header(&cmd->cmd.common, nca); @@ -130,8 +125,7 @@ static int ncsi_cmd_handler_sl(struct sk_buff *skb, { struct ncsi_cmd_sl_pkt *cmd; - cmd = (struct ncsi_cmd_sl_pkt *)skb_put(skb, sizeof(*cmd)); - memset(cmd, 0, sizeof(*cmd)); + cmd = skb_put_zero(skb, sizeof(*cmd)); cmd->mode = htonl(nca->dwords[0]); cmd->oem_mode = htonl(nca->dwords[1]); ncsi_cmd_build_header(&cmd->cmd.common, nca); @@ -144,8 +138,7 @@ static int ncsi_cmd_handler_svf(struct sk_buff *skb, { struct ncsi_cmd_svf_pkt *cmd; - cmd = (struct ncsi_cmd_svf_pkt *)skb_put(skb, sizeof(*cmd)); - memset(cmd, 0, sizeof(*cmd)); + cmd = skb_put_zero(skb, sizeof(*cmd)); cmd->vlan = htons(nca->words[0]); cmd->index = nca->bytes[2]; cmd->enable = nca->bytes[3]; @@ -159,8 +152,7 @@ static int ncsi_cmd_handler_ev(struct sk_buff *skb, { struct ncsi_cmd_ev_pkt *cmd; - cmd = (struct ncsi_cmd_ev_pkt *)skb_put(skb, sizeof(*cmd)); - memset(cmd, 0, sizeof(*cmd)); + cmd = skb_put_zero(skb, sizeof(*cmd)); cmd->mode = nca->bytes[0]; ncsi_cmd_build_header(&cmd->cmd.common, nca); @@ -173,8 +165,7 @@ static int ncsi_cmd_handler_sma(struct sk_buff *skb, struct ncsi_cmd_sma_pkt *cmd; int i; - cmd = (struct ncsi_cmd_sma_pkt *)skb_put(skb, sizeof(*cmd)); - memset(cmd, 0, sizeof(*cmd)); + cmd = skb_put_zero(skb, sizeof(*cmd)); for (i = 0; i < 6; i++) cmd->mac[i] = nca->bytes[i]; cmd->index = nca->bytes[6]; @@ -189,8 +180,7 @@ static int ncsi_cmd_handler_ebf(struct sk_buff *skb, { struct ncsi_cmd_ebf_pkt *cmd; - cmd = (struct ncsi_cmd_ebf_pkt *)skb_put(skb, sizeof(*cmd)); - memset(cmd, 0, sizeof(*cmd)); + cmd = skb_put_zero(skb, sizeof(*cmd)); cmd->mode = htonl(nca->dwords[0]); ncsi_cmd_build_header(&cmd->cmd.common, nca); @@ -202,8 +192,7 @@ static int ncsi_cmd_handler_egmf(struct sk_buff *skb, { struct ncsi_cmd_egmf_pkt *cmd; - cmd = (struct ncsi_cmd_egmf_pkt *)skb_put(skb, sizeof(*cmd)); - memset(cmd, 0, sizeof(*cmd)); + cmd = skb_put_zero(skb, sizeof(*cmd)); cmd->mode = htonl(nca->dwords[0]); ncsi_cmd_build_header(&cmd->cmd.common, nca); @@ -215,8 +204,7 @@ static int ncsi_cmd_handler_snfc(struct sk_buff *skb, { struct ncsi_cmd_snfc_pkt *cmd; - cmd = (struct ncsi_cmd_snfc_pkt *)skb_put(skb, sizeof(*cmd)); - memset(cmd, 0, sizeof(*cmd)); + cmd = skb_put_zero(skb, sizeof(*cmd)); cmd->mode = nca->bytes[0]; ncsi_cmd_build_header(&cmd->cmd.common, nca); @@ -343,7 +331,7 @@ int ncsi_xmit_cmd(struct ncsi_cmd_arg *nca) } /* Fill the ethernet header */ - eh = (struct ethhdr *)skb_push(nr->cmd, sizeof(*eh)); + eh = skb_push(nr->cmd, sizeof(*eh)); eh->h_proto = htons(ETH_P_NCSI); eth_broadcast_addr(eh->h_dest); eth_broadcast_addr(eh->h_source); diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index c9b78e7b342f..913380919301 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -70,10 +70,9 @@ obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o # nf_tables -nf_tables-objs += nf_tables_core.o nf_tables_api.o nf_tables_trace.o -nf_tables-objs += nft_immediate.o nft_cmp.o nft_range.o -nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o -nf_tables-objs += nft_lookup.o nft_dynset.o +nf_tables-objs := nf_tables_core.o nf_tables_api.o nf_tables_trace.o \ + nft_immediate.o nft_cmp.o nft_range.o nft_bitwise.o \ + nft_byteorder.o nft_payload.o nft_lookup.o nft_dynset.o obj-$(CONFIG_NF_TABLES) += nf_tables.o obj-$(CONFIG_NF_TABLES_INET) += nf_tables_inet.o diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index ba6a5516dc7c..e495b5e484b1 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -841,14 +841,16 @@ find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index, static int ip_set_none(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } static int ip_set_create(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) { struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set, *clash = NULL; @@ -989,7 +991,8 @@ ip_set_destroy_set(struct ip_set *set) static int ip_set_destroy(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) { struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *s; @@ -1067,7 +1070,8 @@ ip_set_flush_set(struct ip_set *set) static int ip_set_flush(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) { struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *s; @@ -1106,7 +1110,8 @@ ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = { static int ip_set_rename(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) { struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set, *s; @@ -1155,7 +1160,8 @@ out: static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) { struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *from, *to; @@ -1428,7 +1434,8 @@ out: static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) { if (unlikely(protocol_failed(attr))) return -IPSET_ERR_PROTOCOL; @@ -1513,7 +1520,8 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) { struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set; @@ -1567,7 +1575,8 @@ static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb, static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) { struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set; @@ -1621,7 +1630,8 @@ static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb, static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) { struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set; @@ -1656,7 +1666,8 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb, static int ip_set_header(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) { struct ip_set_net *inst = ip_set_pernet(net); const struct ip_set *set; @@ -1712,7 +1723,8 @@ static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = { static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) { struct sk_buff *skb2; struct nlmsghdr *nlh2; @@ -1770,7 +1782,8 @@ ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = { static int ip_set_protocol(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) { struct sk_buff *skb2; struct nlmsghdr *nlh2; diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c index 42c3e3ba1b94..3f09cdb42562 100644 --- a/net/netfilter/ipset/ip_set_getport.c +++ b/net/netfilter/ipset/ip_set_getport.c @@ -38,8 +38,8 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff, break; } case IPPROTO_SCTP: { - sctp_sctphdr_t _sh; - const sctp_sctphdr_t *sh; + struct sctphdr _sh; + const struct sctphdr *sh; sh = skb_header_pointer(skb, protooff, sizeof(_sh), &_sh); if (!sh) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index ad99c1ceea6f..e31956b58aba 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1037,9 +1037,9 @@ static int ip_vs_out_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb, */ static inline int is_sctp_abort(const struct sk_buff *skb, int nh_len) { - sctp_chunkhdr_t *sch, schunk; - sch = skb_header_pointer(skb, nh_len + sizeof(sctp_sctphdr_t), - sizeof(schunk), &schunk); + struct sctp_chunkhdr *sch, schunk; + sch = skb_header_pointer(skb, nh_len + sizeof(struct sctphdr), + sizeof(schunk), &schunk); if (sch == NULL) return 0; if (sch->type == SCTP_CID_ABORT) @@ -1070,9 +1070,9 @@ static inline bool is_new_conn(const struct sk_buff *skb, return th->syn; } case IPPROTO_SCTP: { - sctp_chunkhdr_t *sch, schunk; + struct sctp_chunkhdr *sch, schunk; - sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t), + sch = skb_header_pointer(skb, iph->len + sizeof(struct sctphdr), sizeof(schunk), &schunk); if (sch == NULL) return false; diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 56f8e4b204ff..3ffad4adaddf 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -15,16 +15,15 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, struct ip_vs_iphdr *iph) { struct ip_vs_service *svc; - sctp_chunkhdr_t _schunkh, *sch; - sctp_sctphdr_t *sh, _sctph; + struct sctp_chunkhdr _schunkh, *sch; + struct sctphdr *sh, _sctph; __be16 _ports[2], *ports = NULL; if (likely(!ip_vs_iph_icmp(iph))) { sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); if (sh) { - sch = skb_header_pointer( - skb, iph->len + sizeof(sctp_sctphdr_t), - sizeof(_schunkh), &_schunkh); + sch = skb_header_pointer(skb, iph->len + sizeof(_sctph), + sizeof(_schunkh), &_schunkh); if (sch && (sch->type == SCTP_CID_INIT || sysctl_sloppy_sctp(ipvs))) ports = &sh->source; @@ -77,7 +76,7 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, return 1; } -static void sctp_nat_csum(struct sk_buff *skb, sctp_sctphdr_t *sctph, +static void sctp_nat_csum(struct sk_buff *skb, struct sctphdr *sctph, unsigned int sctphoff) { sctph->checksum = sctp_compute_cksum(skb, sctphoff); @@ -88,7 +87,7 @@ static int sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { - sctp_sctphdr_t *sctph; + struct sctphdr *sctph; unsigned int sctphoff = iph->len; bool payload_csum = false; @@ -135,7 +134,7 @@ static int sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { - sctp_sctphdr_t *sctph; + struct sctphdr *sctph; unsigned int sctphoff = iph->len; bool payload_csum = false; @@ -378,7 +377,7 @@ static inline void set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, int direction, const struct sk_buff *skb) { - sctp_chunkhdr_t _sctpch, *sch; + struct sctp_chunkhdr _sctpch, *sch; unsigned char chunk_type; int event, next_state; int ihl, cofs; @@ -389,7 +388,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, ihl = ip_hdrlen(skb); #endif - cofs = ihl + sizeof(sctp_sctphdr_t); + cofs = ihl + sizeof(struct sctphdr); sch = skb_header_pointer(skb, cofs, sizeof(_sctpch), &_sctpch); if (sch == NULL) return; @@ -410,7 +409,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, (sch->type == SCTP_CID_COOKIE_ACK)) { int clen = ntohs(sch->length); - if (clen >= sizeof(sctp_chunkhdr_t)) { + if (clen >= sizeof(_sctpch)) { sch = skb_header_pointer(skb, cofs + ALIGN(clen, 4), sizeof(_sctpch), &_sctpch); if (sch && sch->type == SCTP_CID_ABORT) diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index 03d2ccffa9fa..20edd589fe06 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -197,8 +197,8 @@ static void __exit nf_conntrack_amanda_fini(void) { int i; - nf_conntrack_helper_unregister(&amanda_helper[0]); - nf_conntrack_helper_unregister(&amanda_helper[1]); + nf_conntrack_helpers_unregister(amanda_helper, + ARRAY_SIZE(amanda_helper)); for (i = 0; i < ARRAY_SIZE(search); i++) textsearch_destroy(search[i].ts); } @@ -218,16 +218,12 @@ static int __init nf_conntrack_amanda_init(void) goto err1; } } - ret = nf_conntrack_helper_register(&amanda_helper[0]); + ret = nf_conntrack_helpers_register(amanda_helper, + ARRAY_SIZE(amanda_helper)); if (ret < 0) goto err1; - ret = nf_conntrack_helper_register(&amanda_helper[1]); - if (ret < 0) - goto err2; return 0; -err2: - nf_conntrack_helper_unregister(&amanda_helper[0]); err1: while (--i >= 0) textsearch_destroy(search[i].ts); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index e847dbaa0c6b..9979f46c81dc 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1586,13 +1586,12 @@ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb) /* Bring out ya dead! */ static struct nf_conn * -get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), +get_next_corpse(int (*iter)(struct nf_conn *i, void *data), void *data, unsigned int *bucket) { struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; struct hlist_nulls_node *n; - int cpu; spinlock_t *lockp; for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { @@ -1604,8 +1603,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) continue; ct = nf_ct_tuplehash_to_ctrack(h); - if (net_eq(nf_ct_net(ct), net) && - iter(ct, data)) + if (iter(ct, data)) goto found; } } @@ -1614,51 +1612,150 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), cond_resched(); } + return NULL; +found: + atomic_inc(&ct->ct_general.use); + spin_unlock(lockp); + local_bh_enable(); + return ct; +} + +static void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), + void *data, u32 portid, int report) +{ + unsigned int bucket = 0, sequence; + struct nf_conn *ct; + + might_sleep(); + + for (;;) { + sequence = read_seqcount_begin(&nf_conntrack_generation); + + while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) { + /* Time to push up daises... */ + + nf_ct_delete(ct, portid, report); + nf_ct_put(ct); + cond_resched(); + } + + if (!read_seqcount_retry(&nf_conntrack_generation, sequence)) + break; + bucket = 0; + } +} + +struct iter_data { + int (*iter)(struct nf_conn *i, void *data); + void *data; + struct net *net; +}; + +static int iter_net_only(struct nf_conn *i, void *data) +{ + struct iter_data *d = data; + + if (!net_eq(d->net, nf_ct_net(i))) + return 0; + + return d->iter(i, d->data); +} + +static void +__nf_ct_unconfirmed_destroy(struct net *net) +{ + int cpu; + for_each_possible_cpu(cpu) { - struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); + struct nf_conntrack_tuple_hash *h; + struct hlist_nulls_node *n; + struct ct_pcpu *pcpu; + + pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); spin_lock_bh(&pcpu->lock); hlist_nulls_for_each_entry(h, n, &pcpu->unconfirmed, hnnode) { + struct nf_conn *ct; + ct = nf_ct_tuplehash_to_ctrack(h); - if (iter(ct, data)) - set_bit(IPS_DYING_BIT, &ct->status); + + /* we cannot call iter() on unconfirmed list, the + * owning cpu can reallocate ct->ext at any time. + */ + set_bit(IPS_DYING_BIT, &ct->status); } spin_unlock_bh(&pcpu->lock); cond_resched(); } - return NULL; -found: - atomic_inc(&ct->ct_general.use); - spin_unlock(lockp); - local_bh_enable(); - return ct; } -void nf_ct_iterate_cleanup(struct net *net, - int (*iter)(struct nf_conn *i, void *data), - void *data, u32 portid, int report) +void nf_ct_iterate_cleanup_net(struct net *net, + int (*iter)(struct nf_conn *i, void *data), + void *data, u32 portid, int report) { - struct nf_conn *ct; - unsigned int bucket = 0; + struct iter_data d; might_sleep(); if (atomic_read(&net->ct.count) == 0) return; - while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) { - /* Time to push up daises... */ + __nf_ct_unconfirmed_destroy(net); - nf_ct_delete(ct, portid, report); - nf_ct_put(ct); - cond_resched(); + d.iter = iter; + d.data = data; + d.net = net; + + synchronize_net(); + + nf_ct_iterate_cleanup(iter_net_only, &d, portid, report); +} +EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net); + +/** + * nf_ct_iterate_destroy - destroy unconfirmed conntracks and iterate table + * @iter: callback to invoke for each conntrack + * @data: data to pass to @iter + * + * Like nf_ct_iterate_cleanup, but first marks conntracks on the + * unconfirmed list as dying (so they will not be inserted into + * main table). + * + * Can only be called in module exit path. + */ +void +nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data) +{ + struct net *net; + + rtnl_lock(); + for_each_net(net) { + if (atomic_read(&net->ct.count) == 0) + continue; + __nf_ct_unconfirmed_destroy(net); } + rtnl_unlock(); + + /* Need to wait for netns cleanup worker to finish, if its + * running -- it might have deleted a net namespace from + * the global list, so our __nf_ct_unconfirmed_destroy() might + * not have affected all namespaces. + */ + net_ns_barrier(); + + /* a conntrack could have been unlinked from unconfirmed list + * before we grabbed pcpu lock in __nf_ct_unconfirmed_destroy(). + * This makes sure its inserted into conntrack table. + */ + synchronize_net(); + + nf_ct_iterate_cleanup(iter, data, 0, 0); } -EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup); +EXPORT_SYMBOL_GPL(nf_ct_iterate_destroy); static int kill_all(struct nf_conn *i, void *data) { - return 1; + return net_eq(nf_ct_net(i), data); } void nf_ct_free_hashtable(void *hash, unsigned int size) @@ -1723,7 +1820,7 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list) i_see_dead_people: busy = 0; list_for_each_entry(net, net_exit_list, exit_list) { - nf_ct_iterate_cleanup(net, kill_all, NULL, 0, 0); + nf_ct_iterate_cleanup(kill_all, net, 0, 0); if (atomic_read(&net->ct.count) != 0) busy = 1; } diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 3bcdc718484e..f71f0d2558fd 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -1815,14 +1815,44 @@ static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = { }, }; +static int __init h323_helper_init(void) +{ + int ret; + + ret = nf_conntrack_helper_register(&nf_conntrack_helper_h245); + if (ret < 0) + return ret; + ret = nf_conntrack_helpers_register(nf_conntrack_helper_q931, + ARRAY_SIZE(nf_conntrack_helper_q931)); + if (ret < 0) + goto err1; + ret = nf_conntrack_helpers_register(nf_conntrack_helper_ras, + ARRAY_SIZE(nf_conntrack_helper_ras)); + if (ret < 0) + goto err2; + + return 0; +err2: + nf_conntrack_helpers_unregister(nf_conntrack_helper_q931, + ARRAY_SIZE(nf_conntrack_helper_q931)); +err1: + nf_conntrack_helper_unregister(&nf_conntrack_helper_h245); + return ret; +} + +static void __exit h323_helper_exit(void) +{ + nf_conntrack_helpers_unregister(nf_conntrack_helper_ras, + ARRAY_SIZE(nf_conntrack_helper_ras)); + nf_conntrack_helpers_unregister(nf_conntrack_helper_q931, + ARRAY_SIZE(nf_conntrack_helper_q931)); + nf_conntrack_helper_unregister(&nf_conntrack_helper_h245); +} + /****************************************************************************/ static void __exit nf_conntrack_h323_fini(void) { - nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[1]); - nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[0]); - nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[1]); - nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[0]); - nf_conntrack_helper_unregister(&nf_conntrack_helper_h245); + h323_helper_exit(); kfree(h323_buffer); pr_debug("nf_ct_h323: fini\n"); } @@ -1837,32 +1867,11 @@ static int __init nf_conntrack_h323_init(void) h323_buffer = kmalloc(65536, GFP_KERNEL); if (!h323_buffer) return -ENOMEM; - ret = nf_conntrack_helper_register(&nf_conntrack_helper_h245); + ret = h323_helper_init(); if (ret < 0) goto err1; - ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[0]); - if (ret < 0) - goto err2; - ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[1]); - if (ret < 0) - goto err3; - ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[0]); - if (ret < 0) - goto err4; - ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[1]); - if (ret < 0) - goto err5; pr_debug("nf_ct_h323: init success\n"); return 0; - -err5: - nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[0]); -err4: - nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[1]); -err3: - nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[0]); -err2: - nf_conntrack_helper_unregister(&nf_conntrack_helper_h245); err1: kfree(h323_buffer); return ret; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 7f6100ca63be..9129bb3b5153 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -285,16 +285,16 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper); /* appropriate ct lock protecting must be taken by caller */ -static inline int unhelp(struct nf_conntrack_tuple_hash *i, - const struct nf_conntrack_helper *me) +static int unhelp(struct nf_conn *ct, void *me) { - struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i); struct nf_conn_help *help = nfct_help(ct); if (help && rcu_dereference_raw(help->helper) == me) { nf_conntrack_event(IPCT_HELPER, ct); RCU_INIT_POINTER(help->helper, NULL); } + + /* We are not intended to delete this conntrack. */ return 0; } @@ -437,33 +437,10 @@ out: } EXPORT_SYMBOL_GPL(nf_conntrack_helper_register); -static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, - struct net *net) -{ - struct nf_conntrack_tuple_hash *h; - const struct hlist_nulls_node *nn; - int cpu; - - /* Get rid of expecteds, set helpers to NULL. */ - for_each_possible_cpu(cpu) { - struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); - - spin_lock_bh(&pcpu->lock); - hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode) - unhelp(h, me); - spin_unlock_bh(&pcpu->lock); - } -} - void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) { - struct nf_conntrack_tuple_hash *h; struct nf_conntrack_expect *exp; const struct hlist_node *next; - const struct hlist_nulls_node *nn; - unsigned int last_hsize; - spinlock_t *lock; - struct net *net; unsigned int i; mutex_lock(&nf_ct_helper_mutex); @@ -491,26 +468,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) } spin_unlock_bh(&nf_conntrack_expect_lock); - rtnl_lock(); - for_each_net(net) - __nf_conntrack_helper_unregister(me, net); - rtnl_unlock(); - - local_bh_disable(); -restart: - last_hsize = nf_conntrack_htable_size; - for (i = 0; i < last_hsize; i++) { - lock = &nf_conntrack_locks[i % CONNTRACK_LOCKS]; - nf_conntrack_lock(lock); - if (last_hsize != nf_conntrack_htable_size) { - spin_unlock(lock); - goto restart; - } - hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode) - unhelp(h, me); - spin_unlock(lock); - } - local_bh_enable(); + nf_ct_iterate_destroy(unhelp, me); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index a8be9b72e6cd..7999e70c3bfb 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -636,11 +636,11 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) if (events & (1 << IPCT_DESTROY)) { type = IPCTNL_MSG_CT_DELETE; group = NFNLGRP_CONNTRACK_DESTROY; - } else if (events & ((1 << IPCT_NEW) | (1 << IPCT_RELATED))) { + } else if (events & ((1 << IPCT_NEW) | (1 << IPCT_RELATED))) { type = IPCTNL_MSG_CT_NEW; flags = NLM_F_CREATE|NLM_F_EXCL; group = NFNLGRP_CONNTRACK_NEW; - } else if (events) { + } else if (events) { type = IPCTNL_MSG_CT_NEW; group = NFNLGRP_CONNTRACK_UPDATE; } else @@ -1122,8 +1122,8 @@ static int ctnetlink_flush_conntrack(struct net *net, return PTR_ERR(filter); } - nf_ct_iterate_cleanup(net, ctnetlink_filter_match, filter, - portid, report); + nf_ct_iterate_cleanup_net(net, ctnetlink_filter_match, filter, + portid, report); kfree(filter); return 0; @@ -1132,7 +1132,8 @@ static int ctnetlink_flush_conntrack(struct net *net, static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; @@ -1184,7 +1185,8 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl, static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; @@ -1345,7 +1347,8 @@ ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb) static int ctnetlink_get_ct_dying(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { @@ -1367,7 +1370,8 @@ ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb) static int ctnetlink_get_ct_unconfirmed(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { @@ -1906,7 +1910,8 @@ err1: static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { struct nf_conntrack_tuple otuple, rtuple; struct nf_conntrack_tuple_hash *h = NULL; @@ -2071,7 +2076,8 @@ ctnetlink_ct_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb) static int ctnetlink_stat_ct_cpu(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { @@ -2116,7 +2122,8 @@ nlmsg_failure: static int ctnetlink_stat_ct(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { struct sk_buff *skb2; int err; @@ -2778,7 +2785,8 @@ out: static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { int err; struct nfgenmsg *nfmsg = nlmsg_data(nlh); @@ -2822,7 +2830,8 @@ static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl, static int ctnetlink_get_expect(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { struct nf_conntrack_tuple tuple; struct nf_conntrack_expect *exp; @@ -2834,7 +2843,8 @@ static int ctnetlink_get_expect(struct net *net, struct sock *ctnl, if (nlh->nlmsg_flags & NLM_F_DUMP) { if (cda[CTA_EXPECT_MASTER]) - return ctnetlink_dump_exp_ct(net, ctnl, skb, nlh, cda); + return ctnetlink_dump_exp_ct(net, ctnl, skb, nlh, cda, + extack); else { struct netlink_dump_control c = { .dump = ctnetlink_exp_dump_table, @@ -2902,7 +2912,8 @@ out: static int ctnetlink_del_expect(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { struct nf_conntrack_expect *exp; struct nf_conntrack_tuple tuple; @@ -3190,7 +3201,8 @@ err_ct: static int ctnetlink_new_expect(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { struct nf_conntrack_tuple tuple; struct nf_conntrack_expect *exp; @@ -3296,7 +3308,8 @@ ctnetlink_exp_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb) static int ctnetlink_stat_exp_cpu(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 2de6c1fe3261..1dcad229c3cc 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -28,8 +28,8 @@ #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_core.h> -static struct nf_conntrack_l4proto __rcu **nf_ct_protos[PF_MAX] __read_mostly; -struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX] __read_mostly; +static struct nf_conntrack_l4proto __rcu **nf_ct_protos[NFPROTO_NUMPROTO] __read_mostly; +struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO] __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_l3protos); static DEFINE_MUTEX(nf_ct_proto_mutex); @@ -68,7 +68,7 @@ nf_ct_unregister_sysctl(struct ctl_table_header **header, struct nf_conntrack_l4proto * __nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto) { - if (unlikely(l3proto >= AF_MAX || nf_ct_protos[l3proto] == NULL)) + if (unlikely(l3proto >= NFPROTO_NUMPROTO || nf_ct_protos[l3proto] == NULL)) return &nf_conntrack_l4proto_generic; return rcu_dereference(nf_ct_protos[l3proto][l4proto]); @@ -212,7 +212,7 @@ int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto) int ret = 0; struct nf_conntrack_l3proto *old; - if (proto->l3proto >= AF_MAX) + if (proto->l3proto >= NFPROTO_NUMPROTO) return -EBUSY; if (proto->tuple_to_nlattr && !proto->nlattr_tuple_size) @@ -254,7 +254,7 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register); void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto) { - BUG_ON(proto->l3proto >= AF_MAX); + BUG_ON(proto->l3proto >= NFPROTO_NUMPROTO); mutex_lock(&nf_ct_proto_mutex); BUG_ON(rcu_dereference_protected(nf_ct_l3protos[proto->l3proto], @@ -265,6 +265,8 @@ void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto) mutex_unlock(&nf_ct_proto_mutex); synchronize_rcu(); + /* Remove all contrack entries for this protocol */ + nf_ct_iterate_destroy(kill_l3proto, proto); } EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister); @@ -280,9 +282,6 @@ void nf_ct_l3proto_pernet_unregister(struct net *net, */ if (proto->net_ns_put) proto->net_ns_put(net); - - /* Remove all contrack entries for this protocol */ - nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0); } EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister); @@ -342,7 +341,7 @@ int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto) { int ret = 0; - if (l4proto->l3proto >= PF_MAX) + if (l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos)) return -EBUSY; if ((l4proto->to_nlattr && !l4proto->nlattr_size) || @@ -421,17 +420,23 @@ out: } EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one); -void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto) +static void __nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto) + { - BUG_ON(l4proto->l3proto >= PF_MAX); + BUG_ON(l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos)); - mutex_lock(&nf_ct_proto_mutex); BUG_ON(rcu_dereference_protected( nf_ct_protos[l4proto->l3proto][l4proto->l4proto], lockdep_is_held(&nf_ct_proto_mutex) ) != l4proto); rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], &nf_conntrack_l4proto_generic); +} + +void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto) +{ + mutex_lock(&nf_ct_proto_mutex); + __nf_ct_l4proto_unregister_one(l4proto); mutex_unlock(&nf_ct_proto_mutex); synchronize_rcu(); @@ -448,9 +453,6 @@ void nf_ct_l4proto_pernet_unregister_one(struct net *net, pn->users--; nf_ct_l4proto_unregister_sysctl(net, pn, l4proto); - - /* Remove all contrack entries for this protocol */ - nf_ct_iterate_cleanup(net, kill_l4proto, l4proto, 0, 0); } EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one); @@ -500,8 +502,14 @@ EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register); void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto[], unsigned int num_proto) { + mutex_lock(&nf_ct_proto_mutex); while (num_proto-- != 0) - nf_ct_l4proto_unregister_one(l4proto[num_proto]); + __nf_ct_l4proto_unregister_one(l4proto[num_proto]); + mutex_unlock(&nf_ct_proto_mutex); + + synchronize_net(); + /* Remove all contrack entries for this protocol */ + nf_ct_iterate_destroy(kill_l4proto, l4proto); } EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister); @@ -548,7 +556,7 @@ void nf_conntrack_proto_pernet_fini(struct net *net) int nf_conntrack_proto_init(void) { unsigned int i; - for (i = 0; i < AF_MAX; i++) + for (i = 0; i < NFPROTO_NUMPROTO; i++) rcu_assign_pointer(nf_ct_l3protos[i], &nf_conntrack_l3proto_generic); return 0; @@ -558,6 +566,6 @@ void nf_conntrack_proto_fini(void) { unsigned int i; /* free l3proto protocol tables */ - for (i = 0; i < PF_MAX; i++) + for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++) kfree(nf_ct_protos[i]); } diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 1c5b14a6cab3..31c6c8ee9d5d 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -190,7 +190,7 @@ static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct) } #define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \ -for ((offset) = (dataoff) + sizeof(sctp_sctphdr_t), (count) = 0; \ +for ((offset) = (dataoff) + sizeof(struct sctphdr), (count) = 0; \ (offset) < (skb)->len && \ ((sch) = skb_header_pointer((skb), (offset), sizeof(_sch), &(_sch))); \ (offset) += (ntohs((sch)->length) + 3) & ~3, (count)++) @@ -202,7 +202,7 @@ static int do_basic_checks(struct nf_conn *ct, unsigned long *map) { u_int32_t offset, count; - sctp_chunkhdr_t _sch, *sch; + struct sctp_chunkhdr _sch, *sch; int flag; flag = 0; @@ -395,9 +395,9 @@ static int sctp_packet(struct nf_conn *ct, /* If it is an INIT or an INIT ACK note down the vtag */ if (sch->type == SCTP_CID_INIT || sch->type == SCTP_CID_INIT_ACK) { - sctp_inithdr_t _inithdr, *ih; + struct sctp_inithdr _inithdr, *ih; - ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t), + ih = skb_header_pointer(skb, offset + sizeof(_sch), sizeof(_inithdr), &_inithdr); if (ih == NULL) goto out_unlock; @@ -471,23 +471,20 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb, /* Copy the vtag into the state info */ if (sch->type == SCTP_CID_INIT) { - if (sh->vtag == 0) { - sctp_inithdr_t _inithdr, *ih; + struct sctp_inithdr _inithdr, *ih; + /* Sec 8.5.1 (A) */ + if (sh->vtag) + return false; - ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t), - sizeof(_inithdr), &_inithdr); - if (ih == NULL) - return false; + ih = skb_header_pointer(skb, offset + sizeof(_sch), + sizeof(_inithdr), &_inithdr); + if (!ih) + return false; - pr_debug("Setting vtag %x for new conn\n", - ih->init_tag); + pr_debug("Setting vtag %x for new conn\n", + ih->init_tag); - ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = - ih->init_tag; - } else { - /* Sec 8.5.1 (A) */ - return false; - } + ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag; } else if (sch->type == SCTP_CID_HEARTBEAT) { pr_debug("Setting vtag %x for secondary conntrack\n", sh->vtag); diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c index c9d7f95768ab..f4a566e67213 100644 --- a/net/netfilter/nf_dup_netdev.c +++ b/net/netfilter/nf_dup_netdev.c @@ -13,6 +13,7 @@ #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_dup_netdev.h> static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev) { diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 6c72922d20ca..832c5a08d9a5 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -582,12 +582,8 @@ static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto) .l3proto = l3proto, .l4proto = l4proto, }; - struct net *net; - rtnl_lock(); - for_each_net(net) - nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0); - rtnl_unlock(); + nf_ct_iterate_destroy(nf_nat_proto_remove, &clean); } static void nf_nat_l3proto_clean(u8 l3proto) @@ -595,13 +591,8 @@ static void nf_nat_l3proto_clean(u8 l3proto) struct nf_nat_proto_clean clean = { .l3proto = l3proto, }; - struct net *net; - rtnl_lock(); - - for_each_net(net) - nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0); - rtnl_unlock(); + nf_ct_iterate_destroy(nf_nat_proto_remove, &clean); } /* Protocol registration. */ @@ -822,17 +813,6 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct, } #endif -static void __net_exit nf_nat_net_exit(struct net *net) -{ - struct nf_nat_proto_clean clean = {}; - - nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean, 0, 0); -} - -static struct pernet_operations nf_nat_net_ops = { - .exit = nf_nat_net_exit, -}; - static struct nf_ct_helper_expectfn follow_master_nat = { .name = "nat-follow-master", .expectfn = nf_nat_follow_master, @@ -853,10 +833,6 @@ static int __init nf_nat_init(void) return ret; } - ret = register_pernet_subsys(&nf_nat_net_ops); - if (ret < 0) - goto cleanup_extend; - nf_ct_helper_expectfn_register(&follow_master_nat); BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); @@ -867,18 +843,15 @@ static int __init nf_nat_init(void) RCU_INIT_POINTER(nf_nat_decode_session_hook, __nf_nat_decode_session); #endif return 0; - - cleanup_extend: - rhltable_destroy(&nf_nat_bysource_table); - nf_ct_extend_unregister(&nat_extend); - return ret; } static void __exit nf_nat_cleanup(void) { + struct nf_nat_proto_clean clean = {}; unsigned int i; - unregister_pernet_subsys(&nf_nat_net_ops); + nf_ct_iterate_destroy(nf_nat_proto_clean, &clean); + nf_ct_extend_unregister(&nat_extend); nf_ct_helper_expectfn_unregister(&follow_master_nat); RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL); diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c index 804e8a0ab36e..c57ee3240b1d 100644 --- a/net/netfilter/nf_nat_proto_sctp.c +++ b/net/netfilter/nf_nat_proto_sctp.c @@ -32,7 +32,7 @@ sctp_manip_pkt(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { - sctp_sctphdr_t *hdr; + struct sctphdr *hdr; int hdrsize = 8; /* This could be an inner header returned in imcp packet; in such diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index a504e87c6ddf..49bd8bb16b18 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -152,7 +152,7 @@ void synproxy_init_timestamp_cookie(const struct xt_synproxy_info *info, struct synproxy_options *opts) { opts->tsecr = opts->tsval; - opts->tsval = tcp_time_stamp & ~0x3f; + opts->tsval = tcp_time_stamp_raw() & ~0x3f; if (opts->options & XT_SYNPROXY_OPT_WSCALE) { opts->tsval |= opts->wscale; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index da314be0c048..7843efa33c59 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -13,6 +13,7 @@ #include <linux/list.h> #include <linux/skbuff.h> #include <linux/netlink.h> +#include <linux/vmalloc.h> #include <linux/netfilter.h> #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nf_tables.h> @@ -386,7 +387,7 @@ static inline u64 nf_tables_alloc_handle(struct nft_table *table) return ++table->hgenerator; } -static const struct nf_chain_type *chain_type[AF_MAX][NFT_CHAIN_T_MAX]; +static const struct nf_chain_type *chain_type[NFPROTO_NUMPROTO][NFT_CHAIN_T_MAX]; static const struct nf_chain_type * __nf_tables_chain_type_lookup(int family, const struct nlattr *nla) @@ -534,7 +535,8 @@ done: static int nf_tables_gettable(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_cur(net); @@ -677,7 +679,8 @@ err: static int nf_tables_newtable(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); @@ -830,7 +833,8 @@ out: static int nf_tables_deltable(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); @@ -869,6 +873,9 @@ int nft_register_chain_type(const struct nf_chain_type *ctype) { int err = 0; + if (WARN_ON(ctype->family >= NFPROTO_NUMPROTO)) + return -EINVAL; + nfnl_lock(NFNL_SUBSYS_NFTABLES); if (chain_type[ctype->family][ctype->type] != NULL) { err = -EBUSY; @@ -1123,7 +1130,8 @@ done: static int nf_tables_getchain(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_cur(net); @@ -1319,7 +1327,8 @@ static void nft_chain_release_hook(struct nft_chain_hook *hook) static int nf_tables_newchain(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nlattr * uninitialized_var(name); @@ -1557,7 +1566,8 @@ err1: static int nf_tables_delchain(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); @@ -2038,7 +2048,8 @@ static int nf_tables_dump_rules_done(struct netlink_callback *cb) static int nf_tables_getrule(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_cur(net); @@ -2131,7 +2142,8 @@ static struct nft_expr_info *info; static int nf_tables_newrule(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); @@ -2313,7 +2325,8 @@ static struct nft_rule *nft_rule_lookup_byid(const struct net *net, static int nf_tables_delrule(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); @@ -2377,64 +2390,77 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk, * Sets */ -static LIST_HEAD(nf_tables_set_ops); +static LIST_HEAD(nf_tables_set_types); -int nft_register_set(struct nft_set_ops *ops) +int nft_register_set(struct nft_set_type *type) { nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_add_tail_rcu(&ops->list, &nf_tables_set_ops); + list_add_tail_rcu(&type->list, &nf_tables_set_types); nfnl_unlock(NFNL_SUBSYS_NFTABLES); return 0; } EXPORT_SYMBOL_GPL(nft_register_set); -void nft_unregister_set(struct nft_set_ops *ops) +void nft_unregister_set(struct nft_set_type *type) { nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_del_rcu(&ops->list); + list_del_rcu(&type->list); nfnl_unlock(NFNL_SUBSYS_NFTABLES); } EXPORT_SYMBOL_GPL(nft_unregister_set); +#define NFT_SET_FEATURES (NFT_SET_INTERVAL | NFT_SET_MAP | \ + NFT_SET_TIMEOUT | NFT_SET_OBJECT) + +static bool nft_set_ops_candidate(const struct nft_set_ops *ops, u32 flags) +{ + return (flags & ops->features) == (flags & NFT_SET_FEATURES); +} + /* * Select a set implementation based on the data characteristics and the * given policy. The total memory use might not be known if no size is * given, in that case the amount of memory per element is used. */ static const struct nft_set_ops * -nft_select_set_ops(const struct nlattr * const nla[], +nft_select_set_ops(const struct nft_ctx *ctx, + const struct nlattr * const nla[], const struct nft_set_desc *desc, enum nft_set_policies policy) { const struct nft_set_ops *ops, *bops; struct nft_set_estimate est, best; - u32 features; + const struct nft_set_type *type; + u32 flags = 0; #ifdef CONFIG_MODULES - if (list_empty(&nf_tables_set_ops)) { + if (list_empty(&nf_tables_set_types)) { nfnl_unlock(NFNL_SUBSYS_NFTABLES); request_module("nft-set"); nfnl_lock(NFNL_SUBSYS_NFTABLES); - if (!list_empty(&nf_tables_set_ops)) + if (!list_empty(&nf_tables_set_types)) return ERR_PTR(-EAGAIN); } #endif - features = 0; - if (nla[NFTA_SET_FLAGS] != NULL) { - features = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); - features &= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_TIMEOUT | - NFT_SET_OBJECT; - } + if (nla[NFTA_SET_FLAGS] != NULL) + flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); bops = NULL; best.size = ~0; best.lookup = ~0; best.space = ~0; - list_for_each_entry(ops, &nf_tables_set_ops, list) { - if ((ops->features & features) != features) + list_for_each_entry(type, &nf_tables_set_types, list) { + if (!type->select_ops) + ops = type->ops; + else + ops = type->select_ops(ctx, desc, flags); + if (!ops) + continue; + + if (!nft_set_ops_candidate(ops, flags)) continue; - if (!ops->estimate(desc, features, &est)) + if (!ops->estimate(desc, flags, &est)) continue; switch (policy) { @@ -2465,10 +2491,10 @@ nft_select_set_ops(const struct nlattr * const nla[], break; } - if (!try_module_get(ops->owner)) + if (!try_module_get(type->owner)) continue; if (bops != NULL) - module_put(bops->owner); + module_put(bops->type->owner); bops = ops; best = est; @@ -2816,7 +2842,8 @@ static int nf_tables_dump_sets_done(struct netlink_callback *cb) static int nf_tables_getset(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { u8 genmask = nft_genmask_cur(net); const struct nft_set *set; @@ -2892,7 +2919,8 @@ static int nf_tables_set_desc_parse(const struct nft_ctx *ctx, static int nf_tables_newset(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); @@ -3029,7 +3057,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (!(nlh->nlmsg_flags & NLM_F_CREATE)) return -ENOENT; - ops = nft_select_set_ops(nla, &desc, policy); + ops = nft_select_set_ops(&ctx, nla, &desc, policy); if (IS_ERR(ops)) return PTR_ERR(ops); @@ -3039,12 +3067,13 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, size = 0; if (ops->privsize != NULL) - size = ops->privsize(nla); + size = ops->privsize(nla, &desc); - err = -ENOMEM; - set = kzalloc(sizeof(*set) + size + udlen, GFP_KERNEL); - if (set == NULL) + set = kvzalloc(sizeof(*set) + size + udlen, GFP_KERNEL); + if (!set) { + err = -ENOMEM; goto err1; + } nla_strlcpy(name, nla[NFTA_SET_NAME], sizeof(set->name)); err = nf_tables_set_alloc_name(&ctx, set, name); @@ -3087,17 +3116,17 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, err3: ops->destroy(set); err2: - kfree(set); + kvfree(set); err1: - module_put(ops->owner); + module_put(ops->type->owner); return err; } static void nft_set_destroy(struct nft_set *set) { set->ops->destroy(set); - module_put(set->ops->owner); - kfree(set); + module_put(set->ops->type->owner); + kvfree(set); } static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) @@ -3109,7 +3138,8 @@ static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set static int nf_tables_delset(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); @@ -3469,7 +3499,8 @@ static int nf_tables_dump_set_done(struct netlink_callback *cb) static int nf_tables_getsetelem(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { u8 genmask = nft_genmask_cur(net); const struct nft_set *set; @@ -3870,7 +3901,8 @@ err1: static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { u8 genmask = nft_genmask_next(net); const struct nlattr *attr; @@ -4067,7 +4099,8 @@ err1: static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { u8 genmask = nft_genmask_next(net); const struct nlattr *attr; @@ -4277,7 +4310,8 @@ static const struct nft_object_type *nft_obj_type_get(u32 objtype) static int nf_tables_newobj(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_object_type *type; @@ -4471,7 +4505,8 @@ nft_obj_filter_alloc(const struct nlattr * const nla[]) static int nf_tables_getobj(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_cur(net); @@ -4549,8 +4584,9 @@ static void nft_obj_destroy(struct nft_object *obj) } static int nf_tables_delobj(struct net *net, struct sock *nlsk, - struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); @@ -4680,7 +4716,8 @@ err: static int nf_tables_getgen(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { struct sk_buff *skb2; int err; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 80f5ecf2c3d7..92b05e188fd1 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -201,7 +201,8 @@ replay: if (nc->call_rcu) { err = nc->call_rcu(net, net->nfnl, skb, nlh, - (const struct nlattr **)cda); + (const struct nlattr **)cda, + extack); rcu_read_unlock(); } else { rcu_read_unlock(); @@ -211,7 +212,8 @@ replay: err = -EAGAIN; else if (nc->call) err = nc->call(net, net->nfnl, skb, nlh, - (const struct nlattr **)cda); + (const struct nlattr **)cda, + extack); else err = -EINVAL; nfnl_unlock(subsys_id); @@ -226,9 +228,11 @@ struct nfnl_err { struct list_head head; struct nlmsghdr *nlh; int err; + struct netlink_ext_ack extack; }; -static int nfnl_err_add(struct list_head *list, struct nlmsghdr *nlh, int err) +static int nfnl_err_add(struct list_head *list, struct nlmsghdr *nlh, int err, + const struct netlink_ext_ack *extack) { struct nfnl_err *nfnl_err; @@ -238,6 +242,7 @@ static int nfnl_err_add(struct list_head *list, struct nlmsghdr *nlh, int err) nfnl_err->nlh = nlh; nfnl_err->err = err; + nfnl_err->extack = *extack; list_add_tail(&nfnl_err->head, list); return 0; @@ -262,7 +267,8 @@ static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb) struct nfnl_err *nfnl_err, *next; list_for_each_entry_safe(nfnl_err, next, err_list, head) { - netlink_ack(skb, nfnl_err->nlh, nfnl_err->err, NULL); + netlink_ack(skb, nfnl_err->nlh, nfnl_err->err, + &nfnl_err->extack); nfnl_err_del(nfnl_err); } } @@ -280,6 +286,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); const struct nfnetlink_subsystem *ss; const struct nfnl_callback *nc; + struct netlink_ext_ack extack; LIST_HEAD(err_list); u32 status; int err; @@ -325,6 +332,7 @@ replay: while (skb->len >= nlmsg_total_size(0)) { int msglen, type; + memset(&extack, 0, sizeof(extack)); nlh = nlmsg_hdr(skb); err = 0; @@ -384,7 +392,8 @@ replay: if (nc->call_batch) { err = nc->call_batch(net, net->nfnl, skb, nlh, - (const struct nlattr **)cda); + (const struct nlattr **)cda, + &extack); } /* The lock was released to autoload some module, we @@ -402,7 +411,7 @@ ack: * processed, this avoids that the same error is * reported several times when replaying the batch. */ - if (nfnl_err_add(&err_list, nlh, err) < 0) { + if (nfnl_err_add(&err_list, nlh, err, &extack) < 0) { /* We failed to enqueue an error, reset the * list of errors and send OOM to userspace * pointing to the batch header. diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 9898fb4d0512..c45e6d4358ab 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -49,7 +49,8 @@ struct nfacct_filter { static int nfnl_acct_new(struct net *net, struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const tb[]) + const struct nlattr * const tb[], + struct netlink_ext_ack *extack) { struct nf_acct *nfacct, *matching = NULL; char *acct_name; @@ -264,7 +265,8 @@ nfacct_filter_alloc(const struct nlattr * const attr) static int nfnl_acct_get(struct net *net, struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const tb[]) + const struct nlattr * const tb[], + struct netlink_ext_ack *extack) { int ret = -ENOENT; struct nf_acct *cur; @@ -343,7 +345,8 @@ static int nfnl_acct_try_del(struct nf_acct *cur) static int nfnl_acct_del(struct net *net, struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const tb[]) + const struct nlattr * const tb[], + struct netlink_ext_ack *extack) { struct nf_acct *cur, *tmp; int ret = -ENOENT; diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index be678a323598..41628b393673 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -398,7 +398,8 @@ nfnl_cthelper_update(const struct nlattr * const tb[], static int nfnl_cthelper_new(struct net *net, struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const tb[]) + const struct nlattr * const tb[], + struct netlink_ext_ack *extack) { const char *helper_name; struct nf_conntrack_helper *cur, *helper = NULL; @@ -599,7 +600,8 @@ out: static int nfnl_cthelper_get(struct net *net, struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const tb[]) + const struct nlattr * const tb[], + struct netlink_ext_ack *extack) { int ret = -ENOENT; struct nf_conntrack_helper *cur; @@ -666,7 +668,8 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl, static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const tb[]) + const struct nlattr * const tb[], + struct netlink_ext_ack *extack) { char *helper_name = NULL; struct nf_conntrack_helper *cur; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index a3e7bb54d96a..400e9ae97153 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -69,7 +69,8 @@ ctnl_timeout_parse_policy(void *timeouts, struct nf_conntrack_l4proto *l4proto, static int cttimeout_new_timeout(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { __u16 l3num; __u8 l4num; @@ -239,7 +240,8 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb) static int cttimeout_get_timeout(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { int ret = -ENOENT; char *name; @@ -287,49 +289,20 @@ static int cttimeout_get_timeout(struct net *net, struct sock *ctnl, return ret; } -static void untimeout(struct nf_conntrack_tuple_hash *i, - struct ctnl_timeout *timeout) +static int untimeout(struct nf_conn *ct, void *timeout) { - struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i); struct nf_conn_timeout *timeout_ext = nf_ct_timeout_find(ct); if (timeout_ext && (!timeout || timeout_ext->timeout == timeout)) RCU_INIT_POINTER(timeout_ext->timeout, NULL); + + /* We are not intended to delete this conntrack. */ + return 0; } static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout) { - struct nf_conntrack_tuple_hash *h; - const struct hlist_nulls_node *nn; - unsigned int last_hsize; - spinlock_t *lock; - int i, cpu; - - for_each_possible_cpu(cpu) { - struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); - - spin_lock_bh(&pcpu->lock); - hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode) - untimeout(h, timeout); - spin_unlock_bh(&pcpu->lock); - } - - local_bh_disable(); -restart: - last_hsize = nf_conntrack_htable_size; - for (i = 0; i < last_hsize; i++) { - lock = &nf_conntrack_locks[i % CONNTRACK_LOCKS]; - nf_conntrack_lock(lock); - if (last_hsize != nf_conntrack_htable_size) { - spin_unlock(lock); - goto restart; - } - - hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode) - untimeout(h, timeout); - spin_unlock(lock); - } - local_bh_enable(); + nf_ct_iterate_cleanup_net(net, untimeout, timeout, 0, 0); } /* try to delete object, fail if it is still in use. */ @@ -355,7 +328,8 @@ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout) static int cttimeout_del_timeout(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { struct ctnl_timeout *cur, *tmp; int ret = -ENOENT; @@ -386,7 +360,8 @@ static int cttimeout_del_timeout(struct net *net, struct sock *ctnl, static int cttimeout_default_set(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { __u16 l3num; __u8 l4num; @@ -475,7 +450,8 @@ nla_put_failure: static int cttimeout_default_get(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { __u16 l3num; __u8 l4num; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index da9704971a83..c684ba95dbb4 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -590,7 +590,7 @@ __build_packet_message(struct nfnl_log_net *log, if (skb_tailroom(inst->skb) < nla_total_size(data_len)) goto nla_put_failure; - nla = (struct nlattr *)skb_put(inst->skb, nla_total_size(data_len)); + nla = skb_put(inst->skb, nla_total_size(data_len)); nla->nla_type = NFULA_PAYLOAD; nla->nla_len = size; @@ -795,7 +795,8 @@ static struct notifier_block nfulnl_rtnl_notifier = { static int nfulnl_recv_unsupp(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) + const struct nlattr * const nfqa[], + struct netlink_ext_ack *extack) { return -ENOTSUPP; } @@ -818,7 +819,8 @@ static const struct nla_policy nfula_cfg_policy[NFULA_CFG_MAX+1] = { static int nfulnl_recv_config(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nfula[]) + const struct nlattr * const nfula[], + struct netlink_ext_ack *extack) { struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int16_t group_num = ntohs(nfmsg->res_id); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 8a0f218b7938..16fa04086880 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -589,7 +589,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (skb_tailroom(skb) < sizeof(*nla) + hlen) goto nla_put_failure; - nla = (struct nlattr *)skb_put(skb, sizeof(*nla)); + nla = skb_put(skb, sizeof(*nla)); nla->nla_type = NFQA_PAYLOAD; nla->nla_len = nla_attr_size(data_len); @@ -1032,7 +1032,8 @@ static int nfq_id_after(unsigned int id, unsigned int max) static int nfqnl_recv_verdict_batch(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) + const struct nlattr * const nfqa[], + struct netlink_ext_ack *extack) { struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct nf_queue_entry *entry, *tmp; @@ -1136,7 +1137,8 @@ static int nfqa_parse_bridge(struct nf_queue_entry *entry, static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) + const struct nlattr * const nfqa[], + struct netlink_ext_ack *extack) { struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int16_t queue_num = ntohs(nfmsg->res_id); @@ -1200,7 +1202,8 @@ static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl, static int nfqnl_recv_unsupp(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) + const struct nlattr * const nfqa[], + struct netlink_ext_ack *extack) { return -ENOTSUPP; } @@ -1217,7 +1220,8 @@ static const struct nf_queue_handler nfqh = { static int nfqnl_recv_config(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) + const struct nlattr * const nfqa[], + struct netlink_ext_ack *extack) { struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int16_t queue_num = ntohs(nfmsg->res_id); diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index f753ec69f790..f5a7cb68694e 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -530,7 +530,8 @@ nla_put_failure: static int nfnl_compat_get(struct net *net, struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const tb[]) + const struct nlattr * const tb[], + struct netlink_ext_ack *extack) { int ret = 0, target; struct nfgenmsg *nfmsg; diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c index d3eb640bc784..c7383d8f88d0 100644 --- a/net/netfilter/nft_rt.c +++ b/net/netfilter/nft_rt.c @@ -23,9 +23,9 @@ struct nft_rt { enum nft_registers dreg:8; }; -void nft_rt_get_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +static void nft_rt_get_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { const struct nft_rt *priv = nft_expr_priv(expr); const struct sk_buff *skb = pkt->skb; @@ -72,9 +72,9 @@ const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = { [NFTA_RT_KEY] = { .type = NLA_U32 }, }; -int nft_rt_get_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) +static int nft_rt_get_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) { struct nft_rt *priv = nft_expr_priv(expr); unsigned int len; @@ -103,8 +103,8 @@ int nft_rt_get_init(const struct nft_ctx *ctx, NFT_DATA_VALUE, len); } -int nft_rt_get_dump(struct sk_buff *skb, - const struct nft_expr *expr) +static int nft_rt_get_dump(struct sk_buff *skb, + const struct nft_expr *expr) { const struct nft_rt *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index b988162b5b15..734989c40579 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -236,7 +236,8 @@ static inline u32 nft_bitmap_total_size(u32 klen) return sizeof(struct nft_bitmap) + nft_bitmap_size(klen); } -static unsigned int nft_bitmap_privsize(const struct nlattr * const nla[]) +static unsigned int nft_bitmap_privsize(const struct nlattr * const nla[], + const struct nft_set_desc *desc) { u32 klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN])); @@ -278,7 +279,9 @@ static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features, return true; } +static struct nft_set_type nft_bitmap_type; static struct nft_set_ops nft_bitmap_ops __read_mostly = { + .type = &nft_bitmap_type, .privsize = nft_bitmap_privsize, .elemsize = offsetof(struct nft_bitmap_elem, ext), .estimate = nft_bitmap_estimate, @@ -291,17 +294,21 @@ static struct nft_set_ops nft_bitmap_ops __read_mostly = { .activate = nft_bitmap_activate, .lookup = nft_bitmap_lookup, .walk = nft_bitmap_walk, +}; + +static struct nft_set_type nft_bitmap_type __read_mostly = { + .ops = &nft_bitmap_ops, .owner = THIS_MODULE, }; static int __init nft_bitmap_module_init(void) { - return nft_register_set(&nft_bitmap_ops); + return nft_register_set(&nft_bitmap_type); } static void __exit nft_bitmap_module_exit(void) { - nft_unregister_set(&nft_bitmap_ops); + nft_unregister_set(&nft_bitmap_type); } module_init(nft_bitmap_module_init); diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 3d3a6df4ce70..0fa01d772c5e 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -22,45 +22,43 @@ #include <net/netfilter/nf_tables.h> /* We target a hash table size of 4, element hint is 75% of final size */ -#define NFT_HASH_ELEMENT_HINT 3 +#define NFT_RHASH_ELEMENT_HINT 3 -struct nft_hash { +struct nft_rhash { struct rhashtable ht; struct delayed_work gc_work; }; -struct nft_hash_elem { +struct nft_rhash_elem { struct rhash_head node; struct nft_set_ext ext; }; -struct nft_hash_cmp_arg { +struct nft_rhash_cmp_arg { const struct nft_set *set; const u32 *key; u8 genmask; }; -static const struct rhashtable_params nft_hash_params; - -static inline u32 nft_hash_key(const void *data, u32 len, u32 seed) +static inline u32 nft_rhash_key(const void *data, u32 len, u32 seed) { - const struct nft_hash_cmp_arg *arg = data; + const struct nft_rhash_cmp_arg *arg = data; return jhash(arg->key, len, seed); } -static inline u32 nft_hash_obj(const void *data, u32 len, u32 seed) +static inline u32 nft_rhash_obj(const void *data, u32 len, u32 seed) { - const struct nft_hash_elem *he = data; + const struct nft_rhash_elem *he = data; return jhash(nft_set_ext_key(&he->ext), len, seed); } -static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg, - const void *ptr) +static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg, + const void *ptr) { - const struct nft_hash_cmp_arg *x = arg->key; - const struct nft_hash_elem *he = ptr; + const struct nft_rhash_cmp_arg *x = arg->key; + const struct nft_rhash_elem *he = ptr; if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen)) return 1; @@ -71,41 +69,49 @@ static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg, return 0; } -static bool nft_hash_lookup(const struct net *net, const struct nft_set *set, - const u32 *key, const struct nft_set_ext **ext) +static const struct rhashtable_params nft_rhash_params = { + .head_offset = offsetof(struct nft_rhash_elem, node), + .hashfn = nft_rhash_key, + .obj_hashfn = nft_rhash_obj, + .obj_cmpfn = nft_rhash_cmp, + .automatic_shrinking = true, +}; + +static bool nft_rhash_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext) { - struct nft_hash *priv = nft_set_priv(set); - const struct nft_hash_elem *he; - struct nft_hash_cmp_arg arg = { + struct nft_rhash *priv = nft_set_priv(set); + const struct nft_rhash_elem *he; + struct nft_rhash_cmp_arg arg = { .genmask = nft_genmask_cur(net), .set = set, .key = key, }; - he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); + he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params); if (he != NULL) *ext = &he->ext; return !!he; } -static bool nft_hash_update(struct nft_set *set, const u32 *key, - void *(*new)(struct nft_set *, - const struct nft_expr *, - struct nft_regs *regs), - const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_set_ext **ext) +static bool nft_rhash_update(struct nft_set *set, const u32 *key, + void *(*new)(struct nft_set *, + const struct nft_expr *, + struct nft_regs *regs), + const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_set_ext **ext) { - struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_elem *he, *prev; - struct nft_hash_cmp_arg arg = { + struct nft_rhash *priv = nft_set_priv(set); + struct nft_rhash_elem *he, *prev; + struct nft_rhash_cmp_arg arg = { .genmask = NFT_GENMASK_ANY, .set = set, .key = key, }; - he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); + he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params); if (he != NULL) goto out; @@ -114,7 +120,7 @@ static bool nft_hash_update(struct nft_set *set, const u32 *key, goto err1; prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node, - nft_hash_params); + nft_rhash_params); if (IS_ERR(prev)) goto err2; @@ -134,21 +140,21 @@ err1: return false; } -static int nft_hash_insert(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem, - struct nft_set_ext **ext) +static int nft_rhash_insert(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, + struct nft_set_ext **ext) { - struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_elem *he = elem->priv; - struct nft_hash_cmp_arg arg = { + struct nft_rhash *priv = nft_set_priv(set); + struct nft_rhash_elem *he = elem->priv; + struct nft_rhash_cmp_arg arg = { .genmask = nft_genmask_next(net), .set = set, .key = elem->key.val.data, }; - struct nft_hash_elem *prev; + struct nft_rhash_elem *prev; prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node, - nft_hash_params); + nft_rhash_params); if (IS_ERR(prev)) return PTR_ERR(prev); if (prev) { @@ -158,19 +164,19 @@ static int nft_hash_insert(const struct net *net, const struct nft_set *set, return 0; } -static void nft_hash_activate(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) +static void nft_rhash_activate(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem) { - struct nft_hash_elem *he = elem->priv; + struct nft_rhash_elem *he = elem->priv; nft_set_elem_change_active(net, set, &he->ext); nft_set_elem_clear_busy(&he->ext); } -static bool nft_hash_flush(const struct net *net, - const struct nft_set *set, void *priv) +static bool nft_rhash_flush(const struct net *net, + const struct nft_set *set, void *priv) { - struct nft_hash_elem *he = priv; + struct nft_rhash_elem *he = priv; if (!nft_set_elem_mark_busy(&he->ext) || !nft_is_active(net, &he->ext)) { @@ -180,22 +186,22 @@ static bool nft_hash_flush(const struct net *net, return false; } -static void *nft_hash_deactivate(const struct net *net, - const struct nft_set *set, - const struct nft_set_elem *elem) +static void *nft_rhash_deactivate(const struct net *net, + const struct nft_set *set, + const struct nft_set_elem *elem) { - struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_elem *he; - struct nft_hash_cmp_arg arg = { + struct nft_rhash *priv = nft_set_priv(set); + struct nft_rhash_elem *he; + struct nft_rhash_cmp_arg arg = { .genmask = nft_genmask_next(net), .set = set, .key = elem->key.val.data, }; rcu_read_lock(); - he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); + he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params); if (he != NULL && - !nft_hash_flush(net, set, he)) + !nft_rhash_flush(net, set, he)) he = NULL; rcu_read_unlock(); @@ -203,21 +209,21 @@ static void *nft_hash_deactivate(const struct net *net, return he; } -static void nft_hash_remove(const struct net *net, - const struct nft_set *set, - const struct nft_set_elem *elem) +static void nft_rhash_remove(const struct net *net, + const struct nft_set *set, + const struct nft_set_elem *elem) { - struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_elem *he = elem->priv; + struct nft_rhash *priv = nft_set_priv(set); + struct nft_rhash_elem *he = elem->priv; - rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params); + rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params); } -static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_iter *iter) +static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_iter *iter) { - struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_elem *he; + struct nft_rhash *priv = nft_set_priv(set); + struct nft_rhash_elem *he; struct rhashtable_iter hti; struct nft_set_elem elem; int err; @@ -266,16 +272,16 @@ out: rhashtable_walk_exit(&hti); } -static void nft_hash_gc(struct work_struct *work) +static void nft_rhash_gc(struct work_struct *work) { struct nft_set *set; - struct nft_hash_elem *he; - struct nft_hash *priv; + struct nft_rhash_elem *he; + struct nft_rhash *priv; struct nft_set_gc_batch *gcb = NULL; struct rhashtable_iter hti; int err; - priv = container_of(work, struct nft_hash, gc_work.work); + priv = container_of(work, struct nft_rhash, gc_work.work); set = nft_set_container_of(priv); err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL); @@ -301,7 +307,7 @@ static void nft_hash_gc(struct work_struct *work) gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC); if (gcb == NULL) goto out; - rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params); + rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params); atomic_dec(&set->nelems); nft_set_gc_batch_add(gcb, he); } @@ -315,82 +321,290 @@ schedule: nft_set_gc_interval(set)); } -static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) +static unsigned int nft_rhash_privsize(const struct nlattr * const nla[], + const struct nft_set_desc *desc) { - return sizeof(struct nft_hash); + return sizeof(struct nft_rhash); } -static const struct rhashtable_params nft_hash_params = { - .head_offset = offsetof(struct nft_hash_elem, node), - .hashfn = nft_hash_key, - .obj_hashfn = nft_hash_obj, - .obj_cmpfn = nft_hash_cmp, - .automatic_shrinking = true, -}; - -static int nft_hash_init(const struct nft_set *set, - const struct nft_set_desc *desc, - const struct nlattr * const tb[]) +static int nft_rhash_init(const struct nft_set *set, + const struct nft_set_desc *desc, + const struct nlattr * const tb[]) { - struct nft_hash *priv = nft_set_priv(set); - struct rhashtable_params params = nft_hash_params; + struct nft_rhash *priv = nft_set_priv(set); + struct rhashtable_params params = nft_rhash_params; int err; - params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT; + params.nelem_hint = desc->size ?: NFT_RHASH_ELEMENT_HINT; params.key_len = set->klen; err = rhashtable_init(&priv->ht, ¶ms); if (err < 0) return err; - INIT_DEFERRABLE_WORK(&priv->gc_work, nft_hash_gc); + INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rhash_gc); if (set->flags & NFT_SET_TIMEOUT) queue_delayed_work(system_power_efficient_wq, &priv->gc_work, nft_set_gc_interval(set)); return 0; } -static void nft_hash_elem_destroy(void *ptr, void *arg) +static void nft_rhash_elem_destroy(void *ptr, void *arg) { nft_set_elem_destroy(arg, ptr, true); } -static void nft_hash_destroy(const struct nft_set *set) +static void nft_rhash_destroy(const struct nft_set *set) { - struct nft_hash *priv = nft_set_priv(set); + struct nft_rhash *priv = nft_set_priv(set); cancel_delayed_work_sync(&priv->gc_work); - rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy, + rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy, (void *)set); } -static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, - struct nft_set_estimate *est) +static u32 nft_hash_buckets(u32 size) { - unsigned int esize; + return roundup_pow_of_two(size * 4 / 3); +} - esize = sizeof(struct nft_hash_elem); - if (desc->size) { - est->size = sizeof(struct nft_hash) + - roundup_pow_of_two(desc->size * 4 / 3) * - sizeof(struct nft_hash_elem *) + - desc->size * esize; - } else { - /* Resizing happens when the load drops below 30% or goes - * above 75%. The average of 52.5% load (approximated by 50%) - * is used for the size estimation of the hash buckets, - * meaning we calculate two buckets per element. - */ - est->size = esize + 2 * sizeof(struct nft_hash_elem *); +static bool nft_rhash_estimate(const struct nft_set_desc *desc, u32 features, + struct nft_set_estimate *est) +{ + est->size = ~0; + est->lookup = NFT_SET_CLASS_O_1; + est->space = NFT_SET_CLASS_O_N; + + return true; +} + +struct nft_hash { + u32 seed; + u32 buckets; + struct hlist_head table[]; +}; + +struct nft_hash_elem { + struct hlist_node node; + struct nft_set_ext ext; +}; + +static bool nft_hash_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext) +{ + struct nft_hash *priv = nft_set_priv(set); + u8 genmask = nft_genmask_cur(net); + const struct nft_hash_elem *he; + u32 hash; + + hash = jhash(key, set->klen, priv->seed); + hash = reciprocal_scale(hash, priv->buckets); + hlist_for_each_entry_rcu(he, &priv->table[hash], node) { + if (!memcmp(nft_set_ext_key(&he->ext), key, set->klen) && + nft_set_elem_active(&he->ext, genmask)) { + *ext = &he->ext; + return true; + } + } + return false; +} + +/* nft_hash_select_ops() makes sure key size can be either 2 or 4 bytes . */ +static inline u32 nft_hash_key(const u32 *key, u32 klen) +{ + if (klen == 4) + return *key; + + return *(u16 *)key; +} + +static bool nft_hash_lookup_fast(const struct net *net, + const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext) +{ + struct nft_hash *priv = nft_set_priv(set); + u8 genmask = nft_genmask_cur(net); + const struct nft_hash_elem *he; + u32 hash, k1, k2; + + k1 = nft_hash_key(key, set->klen); + hash = jhash_1word(k1, priv->seed); + hash = reciprocal_scale(hash, priv->buckets); + hlist_for_each_entry_rcu(he, &priv->table[hash], node) { + k2 = nft_hash_key(nft_set_ext_key(&he->ext)->data, set->klen); + if (k1 == k2 && + nft_set_elem_active(&he->ext, genmask)) { + *ext = &he->ext; + return true; + } + } + return false; +} + +static int nft_hash_insert(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, + struct nft_set_ext **ext) +{ + struct nft_hash_elem *this = elem->priv, *he; + struct nft_hash *priv = nft_set_priv(set); + u8 genmask = nft_genmask_next(net); + u32 hash; + + hash = jhash(nft_set_ext_key(&this->ext), set->klen, priv->seed); + hash = reciprocal_scale(hash, priv->buckets); + hlist_for_each_entry(he, &priv->table[hash], node) { + if (!memcmp(nft_set_ext_key(&this->ext), + nft_set_ext_key(&he->ext), set->klen) && + nft_set_elem_active(&he->ext, genmask)) { + *ext = &he->ext; + return -EEXIST; + } + } + hlist_add_head_rcu(&this->node, &priv->table[hash]); + return 0; +} + +static void nft_hash_activate(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_hash_elem *he = elem->priv; + + nft_set_elem_change_active(net, set, &he->ext); +} + +static bool nft_hash_flush(const struct net *net, + const struct nft_set *set, void *priv) +{ + struct nft_hash_elem *he = priv; + + nft_set_elem_change_active(net, set, &he->ext); + return true; +} + +static void *nft_hash_deactivate(const struct net *net, + const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *this = elem->priv, *he; + u8 genmask = nft_genmask_next(net); + u32 hash; + + hash = jhash(nft_set_ext_key(&this->ext), set->klen, priv->seed); + hash = reciprocal_scale(hash, priv->buckets); + hlist_for_each_entry(he, &priv->table[hash], node) { + if (!memcmp(nft_set_ext_key(&this->ext), &elem->key.val, + set->klen) || + nft_set_elem_active(&he->ext, genmask)) { + nft_set_elem_change_active(net, set, &he->ext); + return he; + } } + return NULL; +} + +static void nft_hash_remove(const struct net *net, + const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_hash_elem *he = elem->priv; + + hlist_del_rcu(&he->node); +} + +static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_iter *iter) +{ + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he; + struct nft_set_elem elem; + int i; + + for (i = 0; i < priv->buckets; i++) { + hlist_for_each_entry_rcu(he, &priv->table[i], node) { + if (iter->count < iter->skip) + goto cont; + if (!nft_set_elem_active(&he->ext, iter->genmask)) + goto cont; + + elem.priv = he; + + iter->err = iter->fn(ctx, set, iter, &elem); + if (iter->err < 0) + return; +cont: + iter->count++; + } + } +} + +static unsigned int nft_hash_privsize(const struct nlattr * const nla[], + const struct nft_set_desc *desc) +{ + return sizeof(struct nft_hash) + + nft_hash_buckets(desc->size) * sizeof(struct hlist_head); +} +static int nft_hash_init(const struct nft_set *set, + const struct nft_set_desc *desc, + const struct nlattr * const tb[]) +{ + struct nft_hash *priv = nft_set_priv(set); + + priv->buckets = nft_hash_buckets(desc->size); + get_random_bytes(&priv->seed, sizeof(priv->seed)); + + return 0; +} + +static void nft_hash_destroy(const struct nft_set *set) +{ + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he; + struct hlist_node *next; + int i; + + for (i = 0; i < priv->buckets; i++) { + hlist_for_each_entry_safe(he, next, &priv->table[i], node) { + hlist_del_rcu(&he->node); + nft_set_elem_destroy(set, he, true); + } + } +} + +static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, + struct nft_set_estimate *est) +{ + est->size = sizeof(struct nft_hash) + + nft_hash_buckets(desc->size) * sizeof(struct hlist_head) + + desc->size * sizeof(struct nft_hash_elem); est->lookup = NFT_SET_CLASS_O_1; est->space = NFT_SET_CLASS_O_N; return true; } +static struct nft_set_type nft_hash_type; +static struct nft_set_ops nft_rhash_ops __read_mostly = { + .type = &nft_hash_type, + .privsize = nft_rhash_privsize, + .elemsize = offsetof(struct nft_rhash_elem, ext), + .estimate = nft_rhash_estimate, + .init = nft_rhash_init, + .destroy = nft_rhash_destroy, + .insert = nft_rhash_insert, + .activate = nft_rhash_activate, + .deactivate = nft_rhash_deactivate, + .flush = nft_rhash_flush, + .remove = nft_rhash_remove, + .lookup = nft_rhash_lookup, + .update = nft_rhash_update, + .walk = nft_rhash_walk, + .features = NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT, +}; + static struct nft_set_ops nft_hash_ops __read_mostly = { + .type = &nft_hash_type, .privsize = nft_hash_privsize, .elemsize = offsetof(struct nft_hash_elem, ext), .estimate = nft_hash_estimate, @@ -402,20 +616,57 @@ static struct nft_set_ops nft_hash_ops __read_mostly = { .flush = nft_hash_flush, .remove = nft_hash_remove, .lookup = nft_hash_lookup, - .update = nft_hash_update, .walk = nft_hash_walk, - .features = NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT, + .features = NFT_SET_MAP | NFT_SET_OBJECT, +}; + +static struct nft_set_ops nft_hash_fast_ops __read_mostly = { + .type = &nft_hash_type, + .privsize = nft_hash_privsize, + .elemsize = offsetof(struct nft_hash_elem, ext), + .estimate = nft_hash_estimate, + .init = nft_hash_init, + .destroy = nft_hash_destroy, + .insert = nft_hash_insert, + .activate = nft_hash_activate, + .deactivate = nft_hash_deactivate, + .flush = nft_hash_flush, + .remove = nft_hash_remove, + .lookup = nft_hash_lookup_fast, + .walk = nft_hash_walk, + .features = NFT_SET_MAP | NFT_SET_OBJECT, +}; + +static const struct nft_set_ops * +nft_hash_select_ops(const struct nft_ctx *ctx, const struct nft_set_desc *desc, + u32 flags) +{ + if (desc->size) { + switch (desc->klen) { + case 2: + case 4: + return &nft_hash_fast_ops; + default: + return &nft_hash_ops; + } + } + + return &nft_rhash_ops; +} + +static struct nft_set_type nft_hash_type __read_mostly = { + .select_ops = nft_hash_select_ops, .owner = THIS_MODULE, }; static int __init nft_hash_module_init(void) { - return nft_register_set(&nft_hash_ops); + return nft_register_set(&nft_hash_type); } static void __exit nft_hash_module_exit(void) { - nft_unregister_set(&nft_hash_ops); + nft_unregister_set(&nft_hash_type); } module_init(nft_hash_module_init); diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index fbdbaa00dd5f..bce5382f1d49 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -251,7 +251,8 @@ cont: read_unlock_bh(&priv->lock); } -static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[]) +static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[], + const struct nft_set_desc *desc) { return sizeof(struct nft_rbtree); } @@ -283,13 +284,11 @@ static void nft_rbtree_destroy(const struct nft_set *set) static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, struct nft_set_estimate *est) { - unsigned int nsize; - - nsize = sizeof(struct nft_rbtree_elem); if (desc->size) - est->size = sizeof(struct nft_rbtree) + desc->size * nsize; + est->size = sizeof(struct nft_rbtree) + + desc->size * sizeof(struct nft_rbtree_elem); else - est->size = nsize; + est->size = ~0; est->lookup = NFT_SET_CLASS_O_LOG_N; est->space = NFT_SET_CLASS_O_N; @@ -297,7 +296,9 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, return true; } +static struct nft_set_type nft_rbtree_type; static struct nft_set_ops nft_rbtree_ops __read_mostly = { + .type = &nft_rbtree_type, .privsize = nft_rbtree_privsize, .elemsize = offsetof(struct nft_rbtree_elem, ext), .estimate = nft_rbtree_estimate, @@ -311,17 +312,21 @@ static struct nft_set_ops nft_rbtree_ops __read_mostly = { .lookup = nft_rbtree_lookup, .walk = nft_rbtree_walk, .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT, +}; + +static struct nft_set_type nft_rbtree_type __read_mostly = { + .ops = &nft_rbtree_ops, .owner = THIS_MODULE, }; static int __init nft_rbtree_module_init(void) { - return nft_register_set(&nft_rbtree_ops); + return nft_register_set(&nft_rbtree_type); } static void __exit nft_rbtree_module_exit(void) { - nft_unregister_set(&nft_rbtree_ops); + nft_unregister_set(&nft_rbtree_type); } module_init(nft_rbtree_module_init); diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index df7f1df00330..d767e35fff6b 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -127,7 +127,7 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp, daddr, dport, in->ifindex); - if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; /* NOTE: we return listeners even if bound to * 0.0.0.0, those are filtered out in @@ -197,7 +197,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp, daddr, ntohs(dport), in->ifindex); - if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; /* NOTE: we return listeners even if bound to * 0.0.0.0, those are filtered out in diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index c05fefcec238..71cfa9551d08 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -63,7 +63,8 @@ static const struct nla_policy xt_osf_policy[OSF_ATTR_MAX + 1] = { static int xt_osf_add_callback(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const osf_attrs[]) + const struct nlattr * const osf_attrs[], + struct netlink_ext_ack *extack) { struct xt_osf_user_finger *f; struct xt_osf_finger *kf = NULL, *sf; @@ -107,7 +108,8 @@ static int xt_osf_add_callback(struct net *net, struct sock *ctnl, static int xt_osf_remove_callback(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const osf_attrs[]) + const struct nlattr * const osf_attrs[], + struct netlink_ext_ack *extack) { struct xt_osf_user_finger *f; struct xt_osf_finger *sf; diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index 4dedb96d1a06..2d2fa1d53ea6 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -42,8 +42,8 @@ match_packet(const struct sk_buff *skb, bool *hotdrop) { u_int32_t chunkmapcopy[256 / sizeof (u_int32_t)]; - const sctp_chunkhdr_t *sch; - sctp_chunkhdr_t _sch; + const struct sctp_chunkhdr *sch; + struct sctp_chunkhdr _sch; int chunk_match_type = info->chunk_match_type; const struct xt_sctp_flag_info *flag_info = info->flag_info; int flag_count = info->flag_count; @@ -118,8 +118,8 @@ static bool sctp_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_sctp_info *info = par->matchinfo; - const sctp_sctphdr_t *sh; - sctp_sctphdr_t _sh; + const struct sctphdr *sh; + struct sctphdr _sh; if (par->fragoff != 0) { pr_debug("Dropping non-first fragment.. FIXME\n"); @@ -136,13 +136,13 @@ sctp_mt(const struct sk_buff *skb, struct xt_action_param *par) return SCCHECK(ntohs(sh->source) >= info->spts[0] && ntohs(sh->source) <= info->spts[1], - XT_SCTP_SRC_PORTS, info->flags, info->invflags) - && SCCHECK(ntohs(sh->dest) >= info->dpts[0] + XT_SCTP_SRC_PORTS, info->flags, info->invflags) && + SCCHECK(ntohs(sh->dest) >= info->dpts[0] && ntohs(sh->dest) <= info->dpts[1], - XT_SCTP_DEST_PORTS, info->flags, info->invflags) - && SCCHECK(match_packet(skb, par->thoff + sizeof(sctp_sctphdr_t), - info, &par->hotdrop), - XT_SCTP_CHUNK_TYPES, info->flags, info->invflags); + XT_SCTP_DEST_PORTS, info->flags, info->invflags) && + SCCHECK(match_packet(skb, par->thoff + sizeof(_sh), + info, &par->hotdrop), + XT_SCTP_CHUNK_TYPES, info->flags, info->invflags); } static int sctp_mt_check(const struct xt_mtchk_param *par) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 7586d446d7dc..5acee49db90b 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -170,7 +170,7 @@ static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb, NETLINK_CB(new).dst_group = NETLINK_CB(skb).dst_group; NETLINK_CB(new).creds = NETLINK_CB(skb).creds; - memcpy(skb_put(new, len), skb->data, len); + skb_put_data(new, skb->data, len); return new; } @@ -372,7 +372,7 @@ static void netlink_sock_destruct(struct sock *sk) } WARN_ON(atomic_read(&sk->sk_rmem_alloc)); - WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(refcount_read(&sk->sk_wmem_alloc)); WARN_ON(nlk_sk(sk)->groups); } @@ -575,7 +575,7 @@ static void netlink_remove(struct sock *sk) table = &nl_table[sk->sk_protocol]; if (!rhashtable_remove_fast(&table->hash, &nlk_sk(sk)->node, netlink_rhashtable_params)) { - WARN_ON(atomic_read(&sk->sk_refcnt) == 1); + WARN_ON(refcount_read(&sk->sk_refcnt) == 1); __sock_put(sk); } @@ -691,7 +691,7 @@ static void deferred_put_nlk_sk(struct rcu_head *head) struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu); struct sock *sk = &nlk->sk; - if (!atomic_dec_and_test(&sk->sk_refcnt)) + if (!refcount_dec_and_test(&sk->sk_refcnt)) return; if (nlk->cb_running && nlk->cb.done) { @@ -1848,7 +1848,7 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) } if (dst_group) { - atomic_inc(&skb->users); + refcount_inc(&skb->users); netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL); } err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT); @@ -2104,7 +2104,7 @@ __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int fla struct nlmsghdr *nlh; int size = nlmsg_msg_size(len); - nlh = (struct nlmsghdr *)skb_put(skb, NLMSG_ALIGN(size)); + nlh = skb_put(skb, NLMSG_ALIGN(size)); nlh->nlmsg_type = type; nlh->nlmsg_len = size; nlh->nlmsg_flags = flags; @@ -2226,7 +2226,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, struct netlink_sock *nlk; int ret; - atomic_inc(&skb->users); + refcount_inc(&skb->users); sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); if (sk == NULL) { @@ -2431,7 +2431,7 @@ int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid, int exclude_portid = 0; if (report) { - atomic_inc(&skb->users); + refcount_inc(&skb->users); exclude_portid = portid; } @@ -2568,7 +2568,7 @@ static int netlink_seq_show(struct seq_file *seq, void *v) sk_rmem_alloc_get(s), sk_wmem_alloc_get(s), nlk->cb_running, - atomic_read(&s->sk_refcnt), + refcount_read(&s->sk_refcnt), atomic_read(&s->sk_drops), sock_i_ino(s) ); diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index d72a4f1558f2..0c59354e280e 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -149,7 +149,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, nr_neigh->count = 0; nr_neigh->number = nr_neigh_no++; nr_neigh->failed = 0; - atomic_set(&nr_neigh->refcount, 1); + refcount_set(&nr_neigh->refcount, 1); if (ax25_digi != NULL && ax25_digi->ndigi > 0) { nr_neigh->digipeat = kmemdup(ax25_digi, @@ -184,7 +184,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, nr_node->which = 0; nr_node->count = 1; - atomic_set(&nr_node->refcount, 1); + refcount_set(&nr_node->refcount, 1); spin_lock_init(&nr_node->node_lock); nr_node->routes[0].quality = quality; @@ -431,7 +431,7 @@ static int __must_check nr_add_neigh(ax25_address *callsign, nr_neigh->count = 0; nr_neigh->number = nr_neigh_no++; nr_neigh->failed = 0; - atomic_set(&nr_neigh->refcount, 1); + refcount_set(&nr_neigh->refcount, 1); if (ax25_digi != NULL && ax25_digi->ndigi > 0) { nr_neigh->digipeat = kmemdup(ax25_digi, sizeof(*ax25_digi), diff --git a/net/nfc/af_nfc.c b/net/nfc/af_nfc.c index 54e40fa47822..d3e594eb36d0 100644 --- a/net/nfc/af_nfc.c +++ b/net/nfc/af_nfc.c @@ -48,7 +48,7 @@ static int nfc_sock_create(struct net *net, struct socket *sock, int proto, return rc; } -static struct net_proto_family nfc_sock_family_ops = { +static const struct net_proto_family nfc_sock_family_ops = { .owner = THIS_MODULE, .family = PF_NFC, .create = nfc_sock_create, diff --git a/net/nfc/core.c b/net/nfc/core.c index 122bb81da918..5cf33df888c3 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -982,6 +982,8 @@ static void nfc_release(struct device *d) kfree(se); } + ida_simple_remove(&nfc_index_ida, dev->idx); + kfree(dev); } @@ -1056,6 +1058,7 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, int tx_headroom, int tx_tailroom) { struct nfc_dev *dev; + int rc; if (!ops->start_poll || !ops->stop_poll || !ops->activate_target || !ops->deactivate_target || !ops->im_transceive) @@ -1068,6 +1071,15 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, if (!dev) return NULL; + rc = ida_simple_get(&nfc_index_ida, 0, 0, GFP_KERNEL); + if (rc < 0) + goto err_free_dev; + dev->idx = rc; + + dev->dev.class = &nfc_class; + dev_set_name(&dev->dev, "nfc%d", dev->idx); + device_initialize(&dev->dev); + dev->ops = ops; dev->supported_protocols = supported_protocols; dev->tx_headroom = tx_headroom; @@ -1090,6 +1102,11 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, } return dev; + +err_free_dev: + kfree(dev); + + return ERR_PTR(rc); } EXPORT_SYMBOL(nfc_allocate_device); @@ -1104,14 +1121,6 @@ int nfc_register_device(struct nfc_dev *dev) pr_debug("dev_name=%s\n", dev_name(&dev->dev)); - dev->idx = ida_simple_get(&nfc_index_ida, 0, 0, GFP_KERNEL); - if (dev->idx < 0) - return dev->idx; - - dev->dev.class = &nfc_class; - dev_set_name(&dev->dev, "nfc%d", dev->idx); - device_initialize(&dev->dev); - mutex_lock(&nfc_devlist_mutex); nfc_devlist_generation++; rc = device_add(&dev->dev); @@ -1149,12 +1158,10 @@ EXPORT_SYMBOL(nfc_register_device); */ void nfc_unregister_device(struct nfc_dev *dev) { - int rc, id; + int rc; pr_debug("dev_name=%s\n", dev_name(&dev->dev)); - id = dev->idx; - if (dev->rfkill) { rfkill_unregister(dev->rfkill); rfkill_destroy(dev->rfkill); @@ -1179,8 +1186,6 @@ void nfc_unregister_device(struct nfc_dev *dev) nfc_devlist_generation++; device_del(&dev->dev); mutex_unlock(&nfc_devlist_mutex); - - ida_simple_remove(&nfc_index_ida, id); } EXPORT_SYMBOL(nfc_unregister_device); diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c index 0fd5518bf252..de6dd37d04c7 100644 --- a/net/nfc/digital_core.c +++ b/net/nfc/digital_core.c @@ -74,8 +74,8 @@ void digital_skb_add_crc(struct sk_buff *skb, crc_func_t crc_func, u16 init, if (msb_first) crc = __fswab16(crc); - *skb_put(skb, 1) = crc & 0xFF; - *skb_put(skb, 1) = (crc >> 8) & 0xFF; + skb_put_u8(skb, crc & 0xFF); + skb_put_u8(skb, (crc >> 8) & 0xFF); } int digital_skb_check_crc(struct sk_buff *skb, crc_func_t crc_func, @@ -240,7 +240,7 @@ int digital_send_cmd(struct nfc_digital_dev *ddev, u8 cmd_type, { struct digital_cmd *cmd; - cmd = kzalloc(sizeof(struct digital_cmd), GFP_KERNEL); + cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); if (!cmd) return -ENOMEM; @@ -287,7 +287,7 @@ static int digital_tg_listen_mdaa(struct nfc_digital_dev *ddev, u8 rf_tech) { struct digital_tg_mdaa_params *params; - params = kzalloc(sizeof(struct digital_tg_mdaa_params), GFP_KERNEL); + params = kzalloc(sizeof(*params), GFP_KERNEL); if (!params) return -ENOMEM; @@ -706,11 +706,9 @@ static int digital_in_send(struct nfc_dev *nfc_dev, struct nfc_target *target, struct digital_data_exch *data_exch; int rc; - data_exch = kzalloc(sizeof(struct digital_data_exch), GFP_KERNEL); - if (!data_exch) { - pr_err("Failed to allocate data_exch struct\n"); + data_exch = kzalloc(sizeof(*data_exch), GFP_KERNEL); + if (!data_exch) return -ENOMEM; - } data_exch->cb = cb; data_exch->cb_context = cb_context; @@ -764,7 +762,7 @@ struct nfc_digital_dev *nfc_digital_allocate_device(struct nfc_digital_ops *ops, !ops->switch_rf || (ops->tg_listen_md && !ops->tg_get_rf_tech)) return NULL; - ddev = kzalloc(sizeof(struct nfc_digital_dev), GFP_KERNEL); + ddev = kzalloc(sizeof(*ddev), GFP_KERNEL); if (!ddev) return NULL; diff --git a/net/nfc/digital_dep.c b/net/nfc/digital_dep.c index f864ce19e13d..4f9a973988b2 100644 --- a/net/nfc/digital_dep.c +++ b/net/nfc/digital_dep.c @@ -151,7 +151,7 @@ static const u8 digital_payload_bits_map[4] = { * 0 <= wt <= 14 (given by the target by the TO field of ATR_RES response) */ #define DIGITAL_NFC_DEP_IN_MAX_WT 14 -#define DIGITAL_NFC_DEP_TG_MAX_WT 8 +#define DIGITAL_NFC_DEP_TG_MAX_WT 14 static const u16 digital_rwt_map[DIGITAL_NFC_DEP_IN_MAX_WT + 1] = { 100, 101, 101, 102, 105, 110, 119, 139, 177, 255, @@ -185,7 +185,7 @@ static void digital_skb_push_dep_sod(struct nfc_digital_dev *ddev, skb->data[0] = skb->len; if (ddev->curr_rf_tech == NFC_DIGITAL_RF_TECH_106A) - *skb_push(skb, sizeof(u8)) = DIGITAL_NFC_DEP_NFCA_SOD_SB; + *(u8 *)skb_push(skb, sizeof(u8)) = DIGITAL_NFC_DEP_NFCA_SOD_SB; } static int digital_skb_pull_dep_sod(struct nfc_digital_dev *ddev, @@ -226,8 +226,7 @@ digital_send_dep_data_prep(struct nfc_digital_dev *ddev, struct sk_buff *skb, return ERR_PTR(-ENOMEM); } - memcpy(skb_put(new_skb, ddev->remote_payload_max), skb->data, - ddev->remote_payload_max); + skb_put_data(new_skb, skb->data, ddev->remote_payload_max); skb_pull(skb, ddev->remote_payload_max); ddev->chaining_skb = skb; @@ -277,8 +276,7 @@ digital_recv_dep_data_gather(struct nfc_digital_dev *ddev, u8 pfb, ddev->chaining_skb = new_skb; } - memcpy(skb_put(ddev->chaining_skb, resp->len), resp->data, - resp->len); + skb_put_data(ddev->chaining_skb, resp->data, resp->len); kfree_skb(resp); resp = NULL; @@ -525,7 +523,7 @@ int digital_in_send_atr_req(struct nfc_digital_dev *ddev, if (gb_len) { atr_req->pp |= DIGITAL_GB_BIT; - memcpy(skb_put(skb, gb_len), gb, gb_len); + skb_put_data(skb, gb, gb_len); } digital_skb_push_dep_sod(ddev, skb); @@ -656,7 +654,7 @@ static int digital_in_send_rtox(struct nfc_digital_dev *ddev, if (!skb) return -ENOMEM; - *skb_put(skb, 1) = rtox; + skb_put_u8(skb, rtox); skb_push(skb, sizeof(struct digital_dep_req_res)); @@ -1012,8 +1010,7 @@ static int digital_tg_send_ack(struct nfc_digital_dev *ddev, if (ddev->did) { dep_res->pfb |= DIGITAL_NFC_DEP_PFB_DID_BIT; - memcpy(skb_put(skb, sizeof(ddev->did)), &ddev->did, - sizeof(ddev->did)); + skb_put_data(skb, &ddev->did, sizeof(ddev->did)); } ddev->curr_nfc_dep_pni = @@ -1057,8 +1054,7 @@ static int digital_tg_send_atn(struct nfc_digital_dev *ddev) if (ddev->did) { dep_res->pfb |= DIGITAL_NFC_DEP_PFB_DID_BIT; - memcpy(skb_put(skb, sizeof(ddev->did)), &ddev->did, - sizeof(ddev->did)); + skb_put_data(skb, &ddev->did, sizeof(ddev->did)); } digital_skb_push_dep_sod(ddev, skb); @@ -1325,8 +1321,7 @@ int digital_tg_send_dep_res(struct nfc_digital_dev *ddev, struct sk_buff *skb) if (ddev->did) { dep_res->pfb |= DIGITAL_NFC_DEP_PFB_DID_BIT; - memcpy(skb_put(skb, sizeof(ddev->did)), &ddev->did, - sizeof(ddev->did)); + skb_put_data(skb, &ddev->did, sizeof(ddev->did)); } ddev->curr_nfc_dep_pni = diff --git a/net/nfc/digital_technology.c b/net/nfc/digital_technology.c index d9080dec5d27..2021d1d58a75 100644 --- a/net/nfc/digital_technology.c +++ b/net/nfc/digital_technology.c @@ -27,6 +27,7 @@ #define DIGITAL_SDD_RES_CT 0x88 #define DIGITAL_SDD_RES_LEN 5 +#define DIGITAL_SEL_RES_LEN 1 #define DIGITAL_SEL_RES_NFCID1_COMPLETE(sel_res) (!((sel_res) & 0x04)) #define DIGITAL_SEL_RES_IS_T2T(sel_res) (!((sel_res) & 0x60)) @@ -266,8 +267,8 @@ static int digital_in_send_rats(struct nfc_digital_dev *ddev, if (!skb) return -ENOMEM; - *skb_put(skb, 1) = DIGITAL_RATS_BYTE1; - *skb_put(skb, 1) = DIGITAL_RATS_PARAM; + skb_put_u8(skb, DIGITAL_RATS_BYTE1); + skb_put_u8(skb, DIGITAL_RATS_PARAM); rc = digital_in_send_cmd(ddev, skb, 30, digital_in_recv_ats, target); @@ -299,7 +300,7 @@ static void digital_in_recv_sel_res(struct nfc_digital_dev *ddev, void *arg, } } - if (!resp->len) { + if (resp->len != DIGITAL_SEL_RES_LEN) { rc = -EIO; goto exit; } @@ -470,8 +471,8 @@ static int digital_in_send_sdd_req(struct nfc_digital_dev *ddev, else sel_cmd = DIGITAL_CMD_SEL_REQ_CL3; - *skb_put(skb, sizeof(u8)) = sel_cmd; - *skb_put(skb, sizeof(u8)) = DIGITAL_SDD_REQ_SEL_PAR; + skb_put_u8(skb, sel_cmd); + skb_put_u8(skb, DIGITAL_SDD_REQ_SEL_PAR); return digital_in_send_cmd(ddev, skb, 30, digital_in_recv_sdd_res, target); @@ -541,7 +542,7 @@ int digital_in_send_sens_req(struct nfc_digital_dev *ddev, u8 rf_tech) if (!skb) return -ENOMEM; - *skb_put(skb, sizeof(u8)) = DIGITAL_CMD_SENS_REQ; + skb_put_u8(skb, DIGITAL_CMD_SENS_REQ); rc = digital_in_send_cmd(ddev, skb, 30, digital_in_recv_sens_res, NULL); if (rc) @@ -625,8 +626,7 @@ static int digital_in_send_attrib_req(struct nfc_digital_dev *ddev, if (!skb) return -ENOMEM; - attrib_req = (struct digital_attrib_req *)skb_put(skb, - sizeof(*attrib_req)); + attrib_req = skb_put(skb, sizeof(*attrib_req)); attrib_req->cmd = DIGITAL_CMD_ATTRIB_REQ; memcpy(attrib_req->nfcid0, sensb_res->nfcid0, @@ -730,8 +730,7 @@ int digital_in_send_sensb_req(struct nfc_digital_dev *ddev, u8 rf_tech) if (!skb) return -ENOMEM; - sensb_req = (struct digital_sensb_req *)skb_put(skb, - sizeof(*sensb_req)); + sensb_req = skb_put(skb, sizeof(*sensb_req)); sensb_req->cmd = DIGITAL_CMD_SENSB_REQ; sensb_req->afi = 0x00; /* All families and sub-families */ @@ -830,7 +829,7 @@ int digital_in_send_sensf_req(struct nfc_digital_dev *ddev, u8 rf_tech) sensf_req->rc = 0; sensf_req->tsn = 0; - *skb_push(skb, 1) = size + 1; + *(u8 *)skb_push(skb, 1) = size + 1; if (!DIGITAL_DRV_CAPS_IN_CRC(ddev)) digital_skb_add_crc_f(skb); @@ -939,7 +938,7 @@ static int digital_tg_send_sel_res(struct nfc_digital_dev *ddev) if (!skb) return -ENOMEM; - *skb_put(skb, 1) = DIGITAL_SEL_RES_NFC_DEP; + skb_put_u8(skb, DIGITAL_SEL_RES_NFC_DEP); if (!DIGITAL_DRV_CAPS_TG_CRC(ddev)) digital_skb_add_crc_a(skb); @@ -1163,7 +1162,7 @@ static int digital_tg_send_sensf_res(struct nfc_digital_dev *ddev, break; } - *skb_push(skb, sizeof(u8)) = size + 1; + *(u8 *)skb_push(skb, sizeof(u8)) = size + 1; if (!DIGITAL_DRV_CAPS_TG_CRC(ddev)) digital_skb_add_crc_f(skb); diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 2b0f0ac498d2..b740fef0acc5 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -727,7 +727,7 @@ static int hci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target, break; } - *skb_push(skb, 1) = 0; /* CTR, see spec:10.2.2.1 */ + *(u8 *)skb_push(skb, 1) = 0; /* CTR, see spec:10.2.2.1 */ hdev->async_cb_type = HCI_CB_TYPE_TRANSCEIVE; hdev->async_cb = cb; @@ -874,13 +874,13 @@ static void nfc_hci_recv_from_llc(struct nfc_hci_dev *hdev, struct sk_buff *skb) return; } - *skb_put(hcp_skb, NFC_HCI_HCP_PACKET_HEADER_LEN) = pipe; + skb_put_u8(hcp_skb, pipe); skb_queue_walk(&hdev->rx_hcp_frags, frag_skb) { msg_len = frag_skb->len - NFC_HCI_HCP_PACKET_HEADER_LEN; - memcpy(skb_put(hcp_skb, msg_len), - frag_skb->data + NFC_HCI_HCP_PACKET_HEADER_LEN, - msg_len); + skb_put_data(hcp_skb, + frag_skb->data + NFC_HCI_HCP_PACKET_HEADER_LEN, + msg_len); } skb_queue_purge(&hdev->rx_hcp_frags); diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c index 401c7e255273..17e59a009ce6 100644 --- a/net/nfc/hci/llc_shdlc.c +++ b/net/nfc/hci/llc_shdlc.c @@ -160,7 +160,7 @@ static int llc_shdlc_send_s_frame(struct llc_shdlc *shdlc, if (skb == NULL) return -ENOMEM; - *skb_push(skb, 1) = SHDLC_CONTROL_HEAD_S | (sframe_type << 3) | nr; + *(u8 *)skb_push(skb, 1) = SHDLC_CONTROL_HEAD_S | (sframe_type << 3) | nr; r = shdlc->xmit_to_drv(shdlc->hdev, skb); @@ -178,7 +178,7 @@ static int llc_shdlc_send_u_frame(struct llc_shdlc *shdlc, pr_debug("uframe_modifier=%d\n", uframe_modifier); - *skb_push(skb, 1) = SHDLC_CONTROL_HEAD_U | uframe_modifier; + *(u8 *)skb_push(skb, 1) = SHDLC_CONTROL_HEAD_U | uframe_modifier; r = shdlc->xmit_to_drv(shdlc->hdev, skb); @@ -382,8 +382,8 @@ static int llc_shdlc_connect_initiate(struct llc_shdlc *shdlc) if (skb == NULL) return -ENOMEM; - *skb_put(skb, 1) = SHDLC_MAX_WINDOW; - *skb_put(skb, 1) = SHDLC_SREJ_SUPPORT ? 1 : 0; + skb_put_u8(skb, SHDLC_MAX_WINDOW); + skb_put_u8(skb, SHDLC_SREJ_SUPPORT ? 1 : 0); return llc_shdlc_send_u_frame(shdlc, skb, U_FRAME_RSET); } @@ -551,8 +551,8 @@ static void llc_shdlc_handle_send_queue(struct llc_shdlc *shdlc) skb = skb_dequeue(&shdlc->send_q); - *skb_push(skb, 1) = SHDLC_CONTROL_HEAD_I | (shdlc->ns << 3) | - shdlc->nr; + *(u8 *)skb_push(skb, 1) = SHDLC_CONTROL_HEAD_I | (shdlc->ns << 3) | + shdlc->nr; pr_debug("Sending I-Frame %d, waiting to rcv %d\n", shdlc->ns, shdlc->nr); diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c index c5959ce503e6..367d8c027101 100644 --- a/net/nfc/llcp_commands.c +++ b/net/nfc/llcp_commands.c @@ -298,7 +298,7 @@ static struct sk_buff *llcp_add_header(struct sk_buff *pdu, pr_debug("header 0x%x 0x%x\n", header[0], header[1]); - memcpy(skb_put(pdu, LLCP_HEADER_SIZE), header, LLCP_HEADER_SIZE); + skb_put_data(pdu, header, LLCP_HEADER_SIZE); return pdu; } @@ -311,7 +311,7 @@ static struct sk_buff *llcp_add_tlv(struct sk_buff *pdu, u8 *tlv, if (tlv == NULL) return NULL; - memcpy(skb_put(pdu, tlv_length), tlv, tlv_length); + skb_put_data(pdu, tlv, tlv_length); return pdu; } @@ -549,7 +549,7 @@ int nfc_llcp_send_snl_sdres(struct nfc_llcp_local *local, return PTR_ERR(skb); hlist_for_each_entry_safe(sdp, n, tlv_list, node) { - memcpy(skb_put(skb, sdp->tlv_len), sdp->tlv, sdp->tlv_len); + skb_put_data(skb, sdp->tlv, sdp->tlv_len); hlist_del(&sdp->node); @@ -581,8 +581,7 @@ int nfc_llcp_send_snl_sdreq(struct nfc_llcp_local *local, hlist_for_each_entry_safe(sdreq, n, tlv_list, node) { pr_debug("tid %d for %s\n", sdreq->tid, sdreq->uri); - memcpy(skb_put(skb, sdreq->tlv_len), sdreq->tlv, - sdreq->tlv_len); + skb_put_data(skb, sdreq->tlv, sdreq->tlv_len); hlist_del(&sdreq->node); @@ -622,7 +621,7 @@ int nfc_llcp_send_dm(struct nfc_llcp_local *local, u8 ssap, u8 dsap, u8 reason) skb = llcp_add_header(skb, dsap, ssap, LLCP_PDU_DM); - memcpy(skb_put(skb, 1), &reason, 1); + skb_put_data(skb, &reason, 1); skb_queue_head(&local->tx_queue, skb); @@ -693,7 +692,7 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock, skb_put(pdu, LLCP_SEQUENCE_SIZE); if (likely(frag_len > 0)) - memcpy(skb_put(pdu, frag_len), msg_ptr, frag_len); + skb_put_data(pdu, msg_ptr, frag_len); skb_queue_tail(&sock->tx_queue, pdu); @@ -759,7 +758,7 @@ int nfc_llcp_send_ui_frame(struct nfc_llcp_sock *sock, u8 ssap, u8 dsap, pdu = llcp_add_header(pdu, dsap, ssap, LLCP_PDU_UI); if (likely(frag_len > 0)) - memcpy(skb_put(pdu, frag_len), msg_ptr, frag_len); + skb_put_data(pdu, msg_ptr, frag_len); /* No need to check for the peer RW for UI frames */ skb_queue_tail(&local->tx_queue, pdu); diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index e69786c6804c..02eef5cf3cce 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -1390,7 +1390,7 @@ static void nfc_llcp_recv_agf(struct nfc_llcp_local *local, struct sk_buff *skb) return; } - memcpy(skb_put(new_skb, pdu_len), skb->data, pdu_len); + skb_put_data(new_skb, skb->data, pdu_len); nfc_llcp_rx_skb(local, new_skb); diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 2ffb18e73df6..fb7afcaa3004 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -77,7 +77,8 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) struct sockaddr_nfc_llcp llcp_addr; int len, ret = 0; - if (!addr || addr->sa_family != AF_NFC) + if (!addr || alen < offsetofend(struct sockaddr, sa_family) || + addr->sa_family != AF_NFC) return -EINVAL; pr_debug("sk %p addr %p family %d\n", sk, addr, addr->sa_family); @@ -151,7 +152,8 @@ static int llcp_raw_sock_bind(struct socket *sock, struct sockaddr *addr, struct sockaddr_nfc_llcp llcp_addr; int len, ret = 0; - if (!addr || addr->sa_family != AF_NFC) + if (!addr || alen < offsetofend(struct sockaddr, sa_family) || + addr->sa_family != AF_NFC) return -EINVAL; pr_debug("sk %p addr %p family %d\n", sk, addr, addr->sa_family); @@ -662,8 +664,7 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr, pr_debug("sock %p sk %p flags 0x%x\n", sock, sk, flags); - if (!addr || len < sizeof(struct sockaddr_nfc) || - addr->sa_family != AF_NFC) + if (!addr || len < sizeof(*addr) || addr->sa_family != AF_NFC) return -EINVAL; if (addr->service_name_len == 0 && addr->dsap == 0) diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 61fff422424f..c25e9b4179c3 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -73,11 +73,10 @@ int nci_get_conn_info_by_dest_type_params(struct nci_dev *ndev, u8 dest_type, if (conn_info->dest_type == dest_type) { if (!params) return conn_info->conn_id; - if (conn_info) { - if (params->id == conn_info->dest_params->id && - params->protocol == conn_info->dest_params->protocol) - return conn_info->conn_id; - } + + if (params->id == conn_info->dest_params->id && + params->protocol == conn_info->dest_params->protocol) + return conn_info->conn_id; } } @@ -462,7 +461,7 @@ int nci_nfcc_loopback(struct nci_dev *ndev, void *data, size_t data_len, return -ENOMEM; skb_reserve(skb, NCI_DATA_HDR_SIZE); - memcpy(skb_put(skb, data_len), data, data_len); + skb_put_data(skb, data, data_len); loopback_data.conn_id = conn_id; loopback_data.data = skb; @@ -1173,8 +1172,7 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops, return ndev; free_nfc: - kfree(ndev->nfc_dev); - + nfc_free_device(ndev->nfc_dev); free_nci: kfree(ndev); return NULL; @@ -1341,7 +1339,7 @@ int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload) return -ENOMEM; } - hdr = (struct nci_ctrl_hdr *) skb_put(skb, NCI_CTRL_HDR_SIZE); + hdr = skb_put(skb, NCI_CTRL_HDR_SIZE); hdr->gid = nci_opcode_gid(opcode); hdr->oid = nci_opcode_oid(opcode); hdr->plen = plen; @@ -1350,7 +1348,7 @@ int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload) nci_pbf_set((__u8 *)hdr, NCI_PBF_LAST); if (plen) - memcpy(skb_put(skb, plen), payload, plen); + skb_put_data(skb, payload, plen); skb_queue_tail(&ndev->cmd_q, skb); queue_work(ndev->cmd_wq, &ndev->cmd_work); diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c index dbd24254412a..908f25e3773e 100644 --- a/net/nfc/nci/data.c +++ b/net/nfc/nci/data.c @@ -81,7 +81,7 @@ static inline void nci_push_data_hdr(struct nci_dev *ndev, struct nci_data_hdr *hdr; int plen = skb->len; - hdr = (struct nci_data_hdr *) skb_push(skb, NCI_DATA_HDR_SIZE); + hdr = skb_push(skb, NCI_DATA_HDR_SIZE); hdr->conn_id = conn_id; hdr->rfu = 0; hdr->plen = plen; @@ -138,7 +138,7 @@ static int nci_queue_tx_data_frags(struct nci_dev *ndev, skb_reserve(skb_frag, NCI_DATA_HDR_SIZE); /* first, copy the data */ - memcpy(skb_put(skb_frag, frag_len), data, frag_len); + skb_put_data(skb_frag, data, frag_len); /* second, set the header */ nci_push_data_hdr(ndev, conn_id, skb_frag, diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index a0ab26d535dc..ddfc52ac1f9b 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -170,7 +170,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, return -ENOMEM; skb_reserve(skb, NCI_DATA_HDR_SIZE + 2); - *skb_push(skb, 1) = data_type; + *(u8 *)skb_push(skb, 1) = data_type; do { len = conn_info->max_pkt_payload_len; @@ -184,10 +184,10 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, len = conn_info->max_pkt_payload_len - skb->len - 1; } - *skb_push(skb, 1) = cb; + *(u8 *)skb_push(skb, 1) = cb; if (len > 0) - memcpy(skb_put(skb, len), data + i, len); + skb_put_data(skb, data + i, len); r = nci_send_data(ndev, conn_info->conn_id, skb); if (r < 0) @@ -472,12 +472,13 @@ void nci_hci_data_received_cb(void *context, return; } - *skb_put(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN) = pipe; + skb_put_u8(hcp_skb, pipe); skb_queue_walk(&ndev->hci_dev->rx_hcp_frags, frag_skb) { msg_len = frag_skb->len - NCI_HCI_HCP_PACKET_HEADER_LEN; - memcpy(skb_put(hcp_skb, msg_len), frag_skb->data + - NCI_HCI_HCP_PACKET_HEADER_LEN, msg_len); + skb_put_data(hcp_skb, + frag_skb->data + NCI_HCI_HCP_PACKET_HEADER_LEN, + msg_len); } skb_queue_purge(&ndev->hci_dev->rx_hcp_frags); diff --git a/net/nfc/nci/spi.c b/net/nfc/nci/spi.c index d904cd2f1442..452f4c16b7a9 100644 --- a/net/nfc/nci/spi.c +++ b/net/nfc/nci/spi.c @@ -86,8 +86,8 @@ int nci_spi_send(struct nci_spi *nspi, u16 crc; crc = crc_ccitt(CRC_INIT, skb->data, skb->len); - *skb_put(skb, 1) = crc >> 8; - *skb_put(skb, 1) = crc & 0xFF; + skb_put_u8(skb, crc >> 8); + skb_put_u8(skb, crc & 0xFF); } if (write_handshake_completion) { @@ -172,8 +172,8 @@ static int send_acknowledge(struct nci_spi *nspi, u8 acknowledge) hdr[3] = 0; crc = crc_ccitt(CRC_INIT, skb->data, skb->len); - *skb_put(skb, 1) = crc >> 8; - *skb_put(skb, 1) = crc & 0xFF; + skb_put_u8(skb, crc >> 8); + skb_put_u8(skb, crc & 0xFF); ret = __nci_spi_send(nspi, skb, 0); @@ -238,8 +238,8 @@ static struct sk_buff *__nci_spi_read(struct nci_spi *nspi) goto receive_error; if (nspi->acknowledge_mode == NCI_SPI_CRC_ENABLED) { - *skb_push(skb, 1) = resp_hdr[1]; - *skb_push(skb, 1) = resp_hdr[0]; + *(u8 *)skb_push(skb, 1) = resp_hdr[1]; + *(u8 *)skb_push(skb, 1) = resp_hdr[0]; } return skb; diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c index c468eabd6943..8d104c1db628 100644 --- a/net/nfc/nci/uart.c +++ b/net/nfc/nci/uart.c @@ -355,7 +355,7 @@ static int nci_uart_default_recv_buf(struct nci_uart *nu, const u8 *data, /* Eat byte after byte till full packet header is received */ if (nu->rx_skb->len < NCI_CTRL_HDR_SIZE) { - *skb_put(nu->rx_skb, 1) = *data++; + skb_put_u8(nu->rx_skb, *data++); --count; continue; } @@ -371,7 +371,7 @@ static int nci_uart_default_recv_buf(struct nci_uart *nu, const u8 *data, chunk_len = nu->rx_packet_len - nu->rx_skb->len; if (count < chunk_len) chunk_len = count; - memcpy(skb_put(nu->rx_skb, chunk_len), data, chunk_len); + skb_put_data(nu->rx_skb, data, chunk_len); data += chunk_len; count -= chunk_len; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 6b0850e63e09..b251fb936a27 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -907,7 +907,9 @@ static int nfc_genl_activate_target(struct sk_buff *skb, struct genl_info *info) u32 device_idx, target_idx, protocol; int rc; - if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) + if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || + !info->attrs[NFC_ATTR_TARGET_INDEX] || + !info->attrs[NFC_ATTR_PROTOCOLS]) return -EINVAL; device_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index e386e6c90b17..e2188deb08dc 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -142,7 +142,7 @@ error: static int rawsock_add_header(struct sk_buff *skb) { - *skb_push(skb, NFC_HEADER_SIZE) = 0; + *(u8 *)skb_push(skb, NFC_HEADER_SIZE) = 0; return 0; } diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 7b17da9a94a0..45fe8c8a884d 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -413,7 +413,7 @@ static void pad_packet(struct datapath *dp, struct sk_buff *skb) size_t plen = NLA_ALIGN(skb->len) - skb->len; if (plen > 0) - memset(skb_put(skb, plen), 0, plen); + skb_put_zero(skb, plen); } } @@ -453,7 +453,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, /* Complete checksum if needed */ if (skb->ip_summed == CHECKSUM_PARTIAL && - (err = skb_checksum_help(skb))) + (err = skb_csum_hwoffload_help(skb, 0))) goto out; /* Older versions of OVS user space enforce alignment of the last @@ -1090,6 +1090,58 @@ static struct sw_flow_actions *get_flow_actions(struct net *net, return acts; } +/* Factor out match-init and action-copy to avoid + * "Wframe-larger-than=1024" warning. Because mask is only + * used to get actions, we new a function to save some + * stack space. + * + * If there are not key and action attrs, we return 0 + * directly. In the case, the caller will also not use the + * match as before. If there is action attr, we try to get + * actions and save them to *acts. Before returning from + * the function, we reset the match->mask pointer. Because + * we should not to return match object with dangling reference + * to mask. + * */ +static int ovs_nla_init_match_and_action(struct net *net, + struct sw_flow_match *match, + struct sw_flow_key *key, + struct nlattr **a, + struct sw_flow_actions **acts, + bool log) +{ + struct sw_flow_mask mask; + int error = 0; + + if (a[OVS_FLOW_ATTR_KEY]) { + ovs_match_init(match, key, true, &mask); + error = ovs_nla_get_match(net, match, a[OVS_FLOW_ATTR_KEY], + a[OVS_FLOW_ATTR_MASK], log); + if (error) + goto error; + } + + if (a[OVS_FLOW_ATTR_ACTIONS]) { + if (!a[OVS_FLOW_ATTR_KEY]) { + OVS_NLERR(log, + "Flow key attribute not present in set flow."); + return -EINVAL; + } + + *acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key, + &mask, log); + if (IS_ERR(*acts)) { + error = PTR_ERR(*acts); + goto error; + } + } + + /* On success, error is 0. */ +error: + match->mask = NULL; + return error; +} + static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) { struct net *net = sock_net(skb->sk); @@ -1097,7 +1149,6 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) struct ovs_header *ovs_header = info->userhdr; struct sw_flow_key key; struct sw_flow *flow; - struct sw_flow_mask mask; struct sk_buff *reply = NULL; struct datapath *dp; struct sw_flow_actions *old_acts = NULL, *acts = NULL; @@ -1109,34 +1160,18 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) bool ufid_present; ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log); - if (a[OVS_FLOW_ATTR_KEY]) { - ovs_match_init(&match, &key, true, &mask); - error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], - a[OVS_FLOW_ATTR_MASK], log); - } else if (!ufid_present) { + if (!a[OVS_FLOW_ATTR_KEY] && !ufid_present) { OVS_NLERR(log, "Flow set message rejected, Key attribute missing."); - error = -EINVAL; + return -EINVAL; } + + error = ovs_nla_init_match_and_action(net, &match, &key, a, + &acts, log); if (error) goto error; - /* Validate actions. */ - if (a[OVS_FLOW_ATTR_ACTIONS]) { - if (!a[OVS_FLOW_ATTR_KEY]) { - OVS_NLERR(log, - "Flow key attribute not present in set flow."); - error = -EINVAL; - goto error; - } - - acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &key, - &mask, log); - if (IS_ERR(acts)) { - error = PTR_ERR(acts); - goto error; - } - + if (acts) { /* Can allocate before locking if have acts. */ reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false, ufid_flags); diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index da931bdef8a7..5d8dcd88815f 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -98,8 +98,8 @@ struct datapath { * @input_vport: The original vport packet came in on. This value is cached * when a packet is received by OVS. * @mru: The maximum received fragement size; 0 if the packet is not - * @cutlen: The number of bytes from the packet end to be removed. * fragmented. + * @cutlen: The number of bytes from the packet end to be removed. */ struct ovs_skb_cb { struct vport *input_vport; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 7e1d8a2afa63..f07d10ac35d8 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2202,7 +2202,9 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, if (start < 0) return start; - tun_dst = metadata_dst_alloc(key.tun_opts_len, GFP_KERNEL); + tun_dst = metadata_dst_alloc(key.tun_opts_len, METADATA_IP_TUNNEL, + GFP_KERNEL); + if (!tun_dst) return -ENOMEM; diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 869acb3b3d3f..7e6301b2ec4d 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -40,14 +40,14 @@ static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb) if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port))) return -EMSGSIZE; - if (vxlan->flags & VXLAN_F_GBP) { + if (vxlan->cfg.flags & VXLAN_F_GBP) { struct nlattr *exts; exts = nla_nest_start(skb, OVS_TUNNEL_ATTR_EXTENSION); if (!exts) return -EMSGSIZE; - if (vxlan->flags & VXLAN_F_GBP && + if (vxlan->cfg.flags & VXLAN_F_GBP && nla_put_flag(skb, OVS_VXLAN_EXT_GBP)) return -EMSGSIZE; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index e3eeed19cc7a..e3beb28203eb 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -188,7 +188,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, #define BLOCK_PRIV(x) ((void *)((char *)(x) + BLOCK_O2PRIV(x))) struct packet_sock; -static int tpacket_snd(struct packet_sock *po, struct msghdr *msg); static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); @@ -196,8 +195,7 @@ static void *packet_previous_frame(struct packet_sock *po, struct packet_ring_buffer *rb, int status); static void packet_increment_head(struct packet_ring_buffer *buff); -static int prb_curr_blk_in_use(struct tpacket_kbdq_core *, - struct tpacket_block_desc *); +static int prb_curr_blk_in_use(struct tpacket_block_desc *); static void *prb_dispatch_next_block(struct tpacket_kbdq_core *, struct packet_sock *); static void prb_retire_current_block(struct tpacket_kbdq_core *, @@ -721,7 +719,7 @@ static void prb_retire_rx_blk_timer_expired(unsigned long data) /* Case 1. Queue was frozen because user-space was * lagging behind. */ - if (prb_curr_blk_in_use(pkc, pbd)) { + if (prb_curr_blk_in_use(pbd)) { /* * Ok, user-space is still behind. * So just refresh the timer. @@ -972,8 +970,7 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc, } } -static int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc, - struct tpacket_block_desc *pbd) +static int prb_curr_blk_in_use(struct tpacket_block_desc *pbd) { return TP_STATUS_USER & BLOCK_STATUS(pbd); } @@ -1064,7 +1061,7 @@ static void *__packet_lookup_frame_in_block(struct packet_sock *po, * Check if that last block which caused the queue to freeze, * is still in_use by user-space. */ - if (prb_curr_blk_in_use(pkc, pbd)) { + if (prb_curr_blk_in_use(pbd)) { /* Can't record this packet */ return NULL; } else { @@ -1320,7 +1317,7 @@ static void packet_sock_destruct(struct sock *sk) skb_queue_purge(&sk->sk_error_queue); WARN_ON(atomic_read(&sk->sk_rmem_alloc)); - WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(refcount_read(&sk->sk_wmem_alloc)); if (!sock_flag(sk, SOCK_DEAD)) { pr_err("Attempt to release alive packet socket: %p\n", sk); @@ -1742,7 +1739,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) match->flags = flags; INIT_LIST_HEAD(&match->list); spin_lock_init(&match->lock); - atomic_set(&match->sk_ref, 0); + refcount_set(&match->sk_ref, 0); fanout_init_data(match); match->prot_hook.type = po->prot_hook.type; match->prot_hook.dev = po->prot_hook.dev; @@ -1756,10 +1753,10 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) match->prot_hook.type == po->prot_hook.type && match->prot_hook.dev == po->prot_hook.dev) { err = -ENOSPC; - if (atomic_read(&match->sk_ref) < PACKET_FANOUT_MAX) { + if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) { __dev_remove_pack(&po->prot_hook); po->fanout = match; - atomic_inc(&match->sk_ref); + refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1); __fanout_link(sk, po); err = 0; } @@ -1788,7 +1785,7 @@ static struct packet_fanout *fanout_release(struct sock *sk) if (f) { po->fanout = NULL; - if (atomic_dec_and_test(&f->sk_ref)) + if (refcount_dec_and_test(&f->sk_ref)) list_del(&f->list); else f = NULL; @@ -2526,7 +2523,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb->data_len = to_write; skb->len += to_write; skb->truesize += to_write; - atomic_add(to_write, &po->sk.sk_wmem_alloc); + refcount_add(to_write, &po->sk.sk_wmem_alloc); while (likely(to_write)) { nr_frags = skb_shinfo(skb)->nr_frags; @@ -4498,7 +4495,7 @@ static int packet_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%pK %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n", s, - atomic_read(&s->sk_refcnt), + refcount_read(&s->sk_refcnt), s->sk_type, ntohs(po->num), po->ifindex, diff --git a/net/packet/internal.h b/net/packet/internal.h index 9ee46314b7d7..94d1d405a116 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -1,6 +1,8 @@ #ifndef __PACKET_INTERNAL_H__ #define __PACKET_INTERNAL_H__ +#include <linux/refcount.h> + struct packet_mclist { struct packet_mclist *next; int ifindex; @@ -86,7 +88,7 @@ struct packet_fanout { struct list_head list; struct sock *arr[PACKET_FANOUT_MAX]; spinlock_t lock; - atomic_t sk_ref; + refcount_t sk_ref; struct packet_type prot_hook ____cacheline_aligned_in_smp; }; diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 64634e3ec2fc..1b050dd17393 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -360,7 +360,7 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock, return POLLHUP; if (sk->sk_state == TCP_ESTABLISHED && - atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf && + refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf && atomic_read(&pn->tx_credits)) mask |= POLLOUT | POLLWRNORM | POLLWRBAND; @@ -614,7 +614,7 @@ static int pn_sock_seq_show(struct seq_file *seq, void *v) sk_wmem_alloc_get(sk), sk_rmem_alloc_get(sk), from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), sock_i_ino(sk), - atomic_read(&sk->sk_refcnt), sk, + refcount_read(&sk->sk_refcnt), sk, atomic_read(&sk->sk_drops)); } seq_pad(seq, '\n'); diff --git a/net/psample/psample.c b/net/psample/psample.c index 8aa58a918783..3a6ad0f438dc 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -264,7 +264,7 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, int nla_len = nla_total_size(data_len); struct nlattr *nla; - nla = (struct nlattr *)skb_put(nl_skb, nla_len); + nla = skb_put(nl_skb, nla_len); nla->nla_type = PSAMPLE_ATTR_DATA; nla->nla_len = nla_attr_size(data_len); diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index a9a8c7d5a4a9..5586609afa27 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -111,6 +111,9 @@ struct qrtr_node { struct list_head item; }; +static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb); +static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb); + /* Release node resources and free the node. * * Do not call directly, use qrtr_node_release. To be used with @@ -236,7 +239,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) return -ENOMEM; skb_reset_transport_header(skb); - memcpy(skb_put(skb, len), data, len); + skb_put_data(skb, data, len); skb_queue_tail(&node->rx_queue, skb); schedule_work(&node->work); @@ -245,23 +248,20 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) } EXPORT_SYMBOL_GPL(qrtr_endpoint_post); -/* Allocate and construct a resume-tx packet. */ -static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node, - u32 dst_node, u32 port) +static struct sk_buff *qrtr_alloc_ctrl_packet(u32 type, size_t pkt_len, + u32 src_node, u32 dst_node) { - const int pkt_len = 20; struct qrtr_hdr *hdr; struct sk_buff *skb; - __le32 *buf; skb = alloc_skb(QRTR_HDR_SIZE + pkt_len, GFP_KERNEL); if (!skb) return NULL; skb_reset_transport_header(skb); - hdr = (struct qrtr_hdr *)skb_put(skb, QRTR_HDR_SIZE); + hdr = skb_put(skb, QRTR_HDR_SIZE); hdr->version = cpu_to_le32(QRTR_PROTO_VER); - hdr->type = cpu_to_le32(QRTR_TYPE_RESUME_TX); + hdr->type = cpu_to_le32(type); hdr->src_node_id = cpu_to_le32(src_node); hdr->src_port_id = cpu_to_le32(QRTR_PORT_CTRL); hdr->confirm_rx = cpu_to_le32(0); @@ -269,8 +269,23 @@ static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node, hdr->dst_node_id = cpu_to_le32(dst_node); hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL); - buf = (__le32 *)skb_put(skb, pkt_len); - memset(buf, 0, pkt_len); + return skb; +} + +/* Allocate and construct a resume-tx packet. */ +static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node, + u32 dst_node, u32 port) +{ + const int pkt_len = 20; + struct sk_buff *skb; + __le32 *buf; + + skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_RESUME_TX, pkt_len, + src_node, dst_node); + if (!skb) + return NULL; + + buf = skb_put_zero(skb, pkt_len); buf[0] = cpu_to_le32(QRTR_TYPE_RESUME_TX); buf[1] = cpu_to_le32(src_node); buf[2] = cpu_to_le32(port); @@ -278,6 +293,43 @@ static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node, return skb; } +/* Allocate and construct a BYE message to signal remote termination */ +static struct sk_buff *qrtr_alloc_local_bye(u32 src_node) +{ + const int pkt_len = 20; + struct sk_buff *skb; + __le32 *buf; + + skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_BYE, pkt_len, + src_node, qrtr_local_nid); + if (!skb) + return NULL; + + buf = skb_put_zero(skb, pkt_len); + buf[0] = cpu_to_le32(QRTR_TYPE_BYE); + + return skb; +} + +static struct sk_buff *qrtr_alloc_del_client(struct sockaddr_qrtr *sq) +{ + const int pkt_len = 20; + struct sk_buff *skb; + __le32 *buf; + + skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_DEL_CLIENT, pkt_len, + sq->sq_node, QRTR_NODE_BCAST); + if (!skb) + return NULL; + + buf = skb_put_zero(skb, pkt_len); + buf[0] = cpu_to_le32(QRTR_TYPE_DEL_CLIENT); + buf[1] = cpu_to_le32(sq->sq_node); + buf[2] = cpu_to_le32(sq->sq_port); + + return skb; +} + static struct qrtr_sock *qrtr_port_lookup(int port); static void qrtr_port_put(struct qrtr_sock *ipc); @@ -369,11 +421,17 @@ EXPORT_SYMBOL_GPL(qrtr_endpoint_register); void qrtr_endpoint_unregister(struct qrtr_endpoint *ep) { struct qrtr_node *node = ep->node; + struct sk_buff *skb; mutex_lock(&node->ep_lock); node->ep = NULL; mutex_unlock(&node->ep_lock); + /* Notify the local controller about the event */ + skb = qrtr_alloc_local_bye(node->nid); + if (skb) + qrtr_local_enqueue(NULL, skb); + qrtr_node_release(node); ep->node = NULL; } @@ -408,8 +466,15 @@ static void qrtr_port_put(struct qrtr_sock *ipc) /* Remove port assignment. */ static void qrtr_port_remove(struct qrtr_sock *ipc) { + struct sk_buff *skb; int port = ipc->us.sq_port; + skb = qrtr_alloc_del_client(&ipc->us); + if (skb) { + skb_set_owner_w(skb, &ipc->sk); + qrtr_bcast_enqueue(NULL, skb); + } + if (port == QRTR_PORT_CTRL) port = 0; @@ -462,6 +527,26 @@ static int qrtr_port_assign(struct qrtr_sock *ipc, int *port) return 0; } +/* Reset all non-control ports */ +static void qrtr_reset_ports(void) +{ + struct qrtr_sock *ipc; + int id; + + mutex_lock(&qrtr_port_lock); + idr_for_each_entry(&qrtr_ports, ipc, id) { + /* Don't reset control port */ + if (id == 0) + continue; + + sock_hold(&ipc->sk); + ipc->sk.sk_err = ENETRESET; + wake_up_interruptible(sk_sleep(&ipc->sk)); + sock_put(&ipc->sk); + } + mutex_unlock(&qrtr_port_lock); +} + /* Bind socket to address. * * Socket should be locked upon call. @@ -490,6 +575,10 @@ static int __qrtr_bind(struct socket *sock, sock_reset_flag(sk, SOCK_ZAPPED); + /* Notify all open ports about the new controller */ + if (port == QRTR_PORT_CTRL) + qrtr_reset_ports(); + return 0; } diff --git a/net/rds/connection.c b/net/rds/connection.c index 6a5ebdea7d2e..50a3789ac23e 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -124,11 +124,6 @@ static void __rds_conn_path_init(struct rds_connection *conn, cp->cp_conn = conn; atomic_set(&cp->cp_state, RDS_CONN_DOWN); cp->cp_send_gen = 0; - /* cp_outgoing is per-path. So we can only set it here - * for the single-path transports. - */ - if (!conn->c_trans->t_mp_capable) - cp->cp_outgoing = (is_outgoing ? 1 : 0); cp->cp_reconnect_jiffies = 0; INIT_DELAYED_WORK(&cp->cp_send_w, rds_send_worker); INIT_DELAYED_WORK(&cp->cp_recv_w, rds_recv_worker); @@ -417,6 +412,7 @@ void rds_conn_destroy(struct rds_connection *conn) "%pI4\n", conn, &conn->c_laddr, &conn->c_faddr); + conn->c_destroy_in_prog = 1; /* Ensure conn will not be scheduled for reconnect */ spin_lock_irq(&rds_conn_lock); hlist_del_init_rcu(&conn->c_hash_node); diff --git a/net/rds/ib.c b/net/rds/ib.c index 7a64c8db81ab..a0954ace3774 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -118,8 +118,8 @@ static void rds_ib_dev_free(struct work_struct *work) void rds_ib_dev_put(struct rds_ib_device *rds_ibdev) { - BUG_ON(atomic_read(&rds_ibdev->refcount) <= 0); - if (atomic_dec_and_test(&rds_ibdev->refcount)) + BUG_ON(refcount_read(&rds_ibdev->refcount) == 0); + if (refcount_dec_and_test(&rds_ibdev->refcount)) queue_work(rds_wq, &rds_ibdev->free_work); } @@ -137,7 +137,7 @@ static void rds_ib_add_one(struct ib_device *device) return; spin_lock_init(&rds_ibdev->spinlock); - atomic_set(&rds_ibdev->refcount, 1); + refcount_set(&rds_ibdev->refcount, 1); INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free); rds_ibdev->max_wrs = device->attrs.max_qp_wr; @@ -205,10 +205,10 @@ static void rds_ib_add_one(struct ib_device *device) down_write(&rds_ib_devices_lock); list_add_tail_rcu(&rds_ibdev->list, &rds_ib_devices); up_write(&rds_ib_devices_lock); - atomic_inc(&rds_ibdev->refcount); + refcount_inc(&rds_ibdev->refcount); ib_set_client_data(device, &rds_ib_client, rds_ibdev); - atomic_inc(&rds_ibdev->refcount); + refcount_inc(&rds_ibdev->refcount); rds_ib_nodev_connect(); @@ -239,7 +239,7 @@ struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device) rcu_read_lock(); rds_ibdev = ib_get_client_data(device, &rds_ib_client); if (rds_ibdev) - atomic_inc(&rds_ibdev->refcount); + refcount_inc(&rds_ibdev->refcount); rcu_read_unlock(); return rds_ibdev; } diff --git a/net/rds/ib.h b/net/rds/ib.h index ec550626e221..bf4822407567 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -230,7 +230,7 @@ struct rds_ib_device { unsigned int max_initiator_depth; unsigned int max_responder_resources; spinlock_t spinlock; /* protect the above */ - atomic_t refcount; + refcount_t refcount; struct work_struct free_work; int *vector_load; }; diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 977f69886c00..9a3c54e659e9 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -52,7 +52,7 @@ static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr) list_for_each_entry_rcu(rds_ibdev, &rds_ib_devices, list) { list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) { if (i_ipaddr->ipaddr == ipaddr) { - atomic_inc(&rds_ibdev->refcount); + refcount_inc(&rds_ibdev->refcount); rcu_read_unlock(); return rds_ibdev; } @@ -134,7 +134,7 @@ void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *con spin_unlock_irq(&ib_nodev_conns_lock); ic->rds_ibdev = rds_ibdev; - atomic_inc(&rds_ibdev->refcount); + refcount_inc(&rds_ibdev->refcount); } void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn) diff --git a/net/rds/message.c b/net/rds/message.c index 49bfb512d808..4318cc9b78f7 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -48,8 +48,8 @@ static unsigned int rds_exthdr_size[__RDS_EXTHDR_MAX] = { void rds_message_addref(struct rds_message *rm) { - rdsdebug("addref rm %p ref %d\n", rm, atomic_read(&rm->m_refcount)); - atomic_inc(&rm->m_refcount); + rdsdebug("addref rm %p ref %d\n", rm, refcount_read(&rm->m_refcount)); + refcount_inc(&rm->m_refcount); } EXPORT_SYMBOL_GPL(rds_message_addref); @@ -83,9 +83,9 @@ static void rds_message_purge(struct rds_message *rm) void rds_message_put(struct rds_message *rm) { - rdsdebug("put rm %p ref %d\n", rm, atomic_read(&rm->m_refcount)); - WARN(!atomic_read(&rm->m_refcount), "danger refcount zero on %p\n", rm); - if (atomic_dec_and_test(&rm->m_refcount)) { + rdsdebug("put rm %p ref %d\n", rm, refcount_read(&rm->m_refcount)); + WARN(!refcount_read(&rm->m_refcount), "danger refcount zero on %p\n", rm); + if (refcount_dec_and_test(&rm->m_refcount)) { BUG_ON(!list_empty(&rm->m_sock_item)); BUG_ON(!list_empty(&rm->m_conn_item)); rds_message_purge(rm); @@ -206,7 +206,7 @@ struct rds_message *rds_message_alloc(unsigned int extra_len, gfp_t gfp) rm->m_used_sgs = 0; rm->m_total_sgs = extra_len / sizeof(struct scatterlist); - atomic_set(&rm->m_refcount, 1); + refcount_set(&rm->m_refcount, 1); INIT_LIST_HEAD(&rm->m_sock_item); INIT_LIST_HEAD(&rm->m_conn_item); spin_lock_init(&rm->m_rs_lock); diff --git a/net/rds/rdma.c b/net/rds/rdma.c index f06fac4886b0..8886f15abe90 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -84,7 +84,7 @@ static struct rds_mr *rds_mr_tree_walk(struct rb_root *root, u64 key, if (insert) { rb_link_node(&insert->r_rb_node, parent, p); rb_insert_color(&insert->r_rb_node, root); - atomic_inc(&insert->r_refcount); + refcount_inc(&insert->r_refcount); } return NULL; } @@ -99,7 +99,7 @@ static void rds_destroy_mr(struct rds_mr *mr) unsigned long flags; rdsdebug("RDS: destroy mr key is %x refcnt %u\n", - mr->r_key, atomic_read(&mr->r_refcount)); + mr->r_key, refcount_read(&mr->r_refcount)); if (test_and_set_bit(RDS_MR_DEAD, &mr->r_state)) return; @@ -223,7 +223,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, goto out; } - atomic_set(&mr->r_refcount, 1); + refcount_set(&mr->r_refcount, 1); RB_CLEAR_NODE(&mr->r_rb_node); mr->r_trans = rs->rs_transport; mr->r_sock = rs; @@ -307,7 +307,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, rdsdebug("RDS: get_mr key is %x\n", mr->r_key); if (mr_ret) { - atomic_inc(&mr->r_refcount); + refcount_inc(&mr->r_refcount); *mr_ret = mr; } @@ -756,7 +756,7 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm, if (!mr) err = -EINVAL; /* invalid r_key */ else - atomic_inc(&mr->r_refcount); + refcount_inc(&mr->r_refcount); spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); if (mr) { diff --git a/net/rds/rds.h b/net/rds/rds.h index 82d38ccf5e8b..516bcc89b46f 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -8,6 +8,7 @@ #include <linux/mutex.h> #include <linux/rds.h> #include <linux/rhashtable.h> +#include <linux/refcount.h> #include "info.h" @@ -92,6 +93,8 @@ enum { #define RDS_MPATH_HASH(rs, n) (jhash_1word((rs)->rs_bound_port, \ (rs)->rs_hash_initval) & ((n) - 1)) +#define IS_CANONICAL(laddr, faddr) (htonl(laddr) < htonl(faddr)) + /* Per mpath connection state */ struct rds_conn_path { struct rds_connection *cp_conn; @@ -125,8 +128,6 @@ struct rds_conn_path { unsigned int cp_unacked_packets; unsigned int cp_unacked_bytes; - unsigned int cp_outgoing:1, - cp_pad_to_32:31; unsigned int cp_index; }; @@ -137,7 +138,8 @@ struct rds_connection { __be32 c_faddr; unsigned int c_loopback:1, c_ping_triggered:1, - c_pad_to_32:30; + c_destroy_in_prog:1, + c_pad_to_32:29; int c_npaths; struct rds_connection *c_passive; struct rds_transport *c_trans; @@ -260,7 +262,7 @@ struct rds_ext_header_rdma_dest { #define RDS_MSG_RX_CMSG 3 struct rds_incoming { - atomic_t i_refcount; + refcount_t i_refcount; struct list_head i_item; struct rds_connection *i_conn; struct rds_conn_path *i_conn_path; @@ -275,7 +277,7 @@ struct rds_incoming { struct rds_mr { struct rb_node r_rb_node; - atomic_t r_refcount; + refcount_t r_refcount; u32 r_key; /* A copy of the creation flags */ @@ -354,7 +356,7 @@ static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie) #define RDS_MSG_FLUSH 8 struct rds_message { - atomic_t m_refcount; + refcount_t m_refcount; struct list_head m_sock_item; struct list_head m_conn_item; struct rds_incoming m_inc; @@ -827,6 +829,7 @@ void rds_send_drop_acked(struct rds_connection *conn, u64 ack, is_acked_func is_acked); void rds_send_path_drop_acked(struct rds_conn_path *cp, u64 ack, is_acked_func is_acked); +void rds_send_ping(struct rds_connection *conn, int cp_index); int rds_send_pong(struct rds_conn_path *cp, __be16 dport); /* rdma.c */ @@ -854,7 +857,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, void __rds_put_mr_final(struct rds_mr *mr); static inline void rds_mr_put(struct rds_mr *mr) { - if (atomic_dec_and_test(&mr->r_refcount)) + if (refcount_dec_and_test(&mr->r_refcount)) __rds_put_mr_final(mr); } diff --git a/net/rds/recv.c b/net/rds/recv.c index c70c32cb05f5..b25bcfe411ca 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -45,7 +45,7 @@ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, { int i; - atomic_set(&inc->i_refcount, 1); + refcount_set(&inc->i_refcount, 1); INIT_LIST_HEAD(&inc->i_item); inc->i_conn = conn; inc->i_saddr = saddr; @@ -61,7 +61,7 @@ EXPORT_SYMBOL_GPL(rds_inc_init); void rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *cp, __be32 saddr) { - atomic_set(&inc->i_refcount, 1); + refcount_set(&inc->i_refcount, 1); INIT_LIST_HEAD(&inc->i_item); inc->i_conn = cp->cp_conn; inc->i_conn_path = cp; @@ -74,14 +74,14 @@ EXPORT_SYMBOL_GPL(rds_inc_path_init); static void rds_inc_addref(struct rds_incoming *inc) { - rdsdebug("addref inc %p ref %d\n", inc, atomic_read(&inc->i_refcount)); - atomic_inc(&inc->i_refcount); + rdsdebug("addref inc %p ref %d\n", inc, refcount_read(&inc->i_refcount)); + refcount_inc(&inc->i_refcount); } void rds_inc_put(struct rds_incoming *inc) { - rdsdebug("put inc %p ref %d\n", inc, atomic_read(&inc->i_refcount)); - if (atomic_dec_and_test(&inc->i_refcount)) { + rdsdebug("put inc %p ref %d\n", inc, refcount_read(&inc->i_refcount)); + if (refcount_dec_and_test(&inc->i_refcount)) { BUG_ON(!list_empty(&inc->i_item)); inc->i_conn->c_trans->inc_free(inc); @@ -215,10 +215,10 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr, switch (type) { case RDS_EXTHDR_NPATHS: conn->c_npaths = min_t(int, RDS_MPATH_WORKERS, - buffer.rds_npaths); + be16_to_cpu(buffer.rds_npaths)); break; case RDS_EXTHDR_GEN_NUM: - new_peer_gen_num = buffer.rds_gen_num; + new_peer_gen_num = be32_to_cpu(buffer.rds_gen_num); break; default: pr_warn_ratelimited("ignoring unknown exthdr type " @@ -227,6 +227,7 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr, } /* if RDS_EXTHDR_NPATHS was not found, default to a single-path */ conn->c_npaths = max_t(int, conn->c_npaths, 1); + conn->c_ping_triggered = 0; rds_conn_peer_gen_update(conn, new_peer_gen_num); } @@ -244,8 +245,7 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr, * called after reception of the probe-pong on all mprds_paths. * Otherwise (sender of probe-ping is not the smaller ip addr): just call * rds_conn_path_connect_if_down on the hashed path. (see rule 4) - * 4. when cp_index > 0, rds_connect_worker must only trigger - * a connection if laddr < faddr. + * 4. rds_connect_worker must only trigger a connection if laddr < faddr. * 5. sender may end up queuing the packet on the cp. will get sent out later. * when connection is completed. */ @@ -254,8 +254,9 @@ static void rds_start_mprds(struct rds_connection *conn) int i; struct rds_conn_path *cp; - if (conn->c_npaths > 1 && conn->c_laddr < conn->c_faddr) { - for (i = 1; i < conn->c_npaths; i++) { + if (conn->c_npaths > 1 && + IS_CANONICAL(conn->c_laddr, conn->c_faddr)) { + for (i = 0; i < conn->c_npaths; i++) { cp = &conn->c_path[i]; rds_conn_path_connect_if_down(cp); } @@ -339,14 +340,15 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr, rds_stats_inc(s_recv_ping); rds_send_pong(cp, inc->i_hdr.h_sport); /* if this is a handshake ping, start multipath if necessary */ - if (RDS_HS_PROBE(inc->i_hdr.h_sport, inc->i_hdr.h_dport)) { + if (RDS_HS_PROBE(be16_to_cpu(inc->i_hdr.h_sport), + be16_to_cpu(inc->i_hdr.h_dport))) { rds_recv_hs_exthdrs(&inc->i_hdr, cp->cp_conn); rds_start_mprds(cp->cp_conn); } goto out; } - if (inc->i_hdr.h_dport == RDS_FLAG_PROBE_PORT && + if (be16_to_cpu(inc->i_hdr.h_dport) == RDS_FLAG_PROBE_PORT && inc->i_hdr.h_sport == 0) { rds_recv_hs_exthdrs(&inc->i_hdr, cp->cp_conn); /* if this is a handshake pong, start multipath if necessary */ diff --git a/net/rds/send.c b/net/rds/send.c index 5cc64039caf7..e81aa176f4e2 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -971,8 +971,6 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, return ret; } -static void rds_send_ping(struct rds_connection *conn); - static int rds_send_mprds_hash(struct rds_sock *rs, struct rds_connection *conn) { int hash; @@ -982,7 +980,7 @@ static int rds_send_mprds_hash(struct rds_sock *rs, struct rds_connection *conn) else hash = RDS_MPATH_HASH(rs, conn->c_npaths); if (conn->c_npaths == 0 && hash != 0) { - rds_send_ping(conn); + rds_send_ping(conn, 0); if (conn->c_npaths == 0) { wait_event_interruptible(conn->c_hs_waitq, @@ -1246,15 +1244,17 @@ rds_send_probe(struct rds_conn_path *cp, __be16 sport, rm->m_inc.i_hdr.h_flags |= h_flags; cp->cp_next_tx_seq++; - if (RDS_HS_PROBE(sport, dport) && cp->cp_conn->c_trans->t_mp_capable) { - u16 npaths = RDS_MPATH_WORKERS; + if (RDS_HS_PROBE(be16_to_cpu(sport), be16_to_cpu(dport)) && + cp->cp_conn->c_trans->t_mp_capable) { + u16 npaths = cpu_to_be16(RDS_MPATH_WORKERS); + u32 my_gen_num = cpu_to_be32(cp->cp_conn->c_my_gen_num); rds_message_add_extension(&rm->m_inc.i_hdr, RDS_EXTHDR_NPATHS, &npaths, sizeof(npaths)); rds_message_add_extension(&rm->m_inc.i_hdr, RDS_EXTHDR_GEN_NUM, - &cp->cp_conn->c_my_gen_num, + &my_gen_num, sizeof(u32)); } spin_unlock_irqrestore(&cp->cp_lock, flags); @@ -1280,11 +1280,11 @@ rds_send_pong(struct rds_conn_path *cp, __be16 dport) return rds_send_probe(cp, 0, dport, 0); } -static void -rds_send_ping(struct rds_connection *conn) +void +rds_send_ping(struct rds_connection *conn, int cp_index) { unsigned long flags; - struct rds_conn_path *cp = &conn->c_path[0]; + struct rds_conn_path *cp = &conn->c_path[cp_index]; spin_lock_irqsave(&cp->cp_lock, flags); if (conn->c_ping_triggered) { @@ -1293,5 +1293,6 @@ rds_send_ping(struct rds_connection *conn) } conn->c_ping_triggered = 1; spin_unlock_irqrestore(&cp->cp_lock, flags); - rds_send_probe(&conn->c_path[0], RDS_FLAG_PROBE_PORT, 0, 0); + rds_send_probe(cp, cpu_to_be16(RDS_FLAG_PROBE_PORT), 0, 0); } +EXPORT_SYMBOL_GPL(rds_send_ping); diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 56ea6620fcf9..f8800b7ce79c 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -71,6 +71,7 @@ void rds_tcp_listen_data_ready(struct sock *sk); int rds_tcp_accept_one(struct socket *sock); int rds_tcp_keepalive(struct socket *sock); void *rds_tcp_listen_sock_def_readable(struct net *net); +void rds_tcp_set_linger(struct socket *sock); /* tcp_recv.c */ int rds_tcp_recv_init(void); diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index d6839d96d539..cbe08a1fa4c7 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -66,7 +66,7 @@ void rds_tcp_state_change(struct sock *sk) * RDS connection as RDS_CONN_UP until the reconnect, * to avoid RDS datagram loss. */ - if (cp->cp_conn->c_laddr > cp->cp_conn->c_faddr && + if (!IS_CANONICAL(cp->cp_conn->c_laddr, cp->cp_conn->c_faddr) && rds_conn_path_transition(cp, RDS_CONN_CONNECTING, RDS_CONN_ERROR)) { rds_conn_path_drop(cp); @@ -135,7 +135,6 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp) ret = sock->ops->connect(sock, (struct sockaddr *)&dest, sizeof(dest), O_NONBLOCK); - cp->cp_outgoing = 1; rdsdebug("connect to address %pI4 returned %d\n", &conn->c_faddr, ret); if (ret == -EINPROGRESS) ret = 0; @@ -171,6 +170,8 @@ void rds_tcp_conn_path_shutdown(struct rds_conn_path *cp) cp->cp_conn, tc, sock); if (sock) { + if (cp->cp_conn->c_destroy_in_prog) + rds_tcp_set_linger(sock); sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN); lock_sock(sock->sk); rds_tcp_restore_callbacks(sock, tc); /* tc->tc_sock = NULL */ diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 507678853e6c..c6dc8caaf5ca 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -83,7 +83,7 @@ static struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn) { int i; - bool peer_is_smaller = (conn->c_faddr < conn->c_laddr); + bool peer_is_smaller = IS_CANONICAL(conn->c_faddr, conn->c_laddr); int npaths = max_t(int, 1, conn->c_npaths); /* for mprds, all paths MUST be initiated by the peer @@ -112,6 +112,17 @@ struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn) return NULL; } +void rds_tcp_set_linger(struct socket *sock) +{ + struct linger no_linger = { + .l_onoff = 1, + .l_linger = 0, + }; + + kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, + (char *)&no_linger, sizeof(no_linger)); +} + int rds_tcp_accept_one(struct socket *sock) { struct socket *new_sock = NULL; @@ -171,30 +182,27 @@ int rds_tcp_accept_one(struct socket *sock) if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_ERROR) goto rst_nsk; if (rs_tcp->t_sock) { - /* Need to resolve a duelling SYN between peers. - * We have an outstanding SYN to this peer, which may - * potentially have transitioned to the RDS_CONN_UP state, - * so we must quiesce any send threads before resetting - * c_transport_data. - */ - if (ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr) || - !cp->cp_outgoing) { - goto rst_nsk; - } else { - rds_tcp_reset_callbacks(new_sock, cp); - cp->cp_outgoing = 0; - /* rds_connect_path_complete() marks RDS_CONN_UP */ - rds_connect_path_complete(cp, RDS_CONN_RESETTING); - } + /* Duelling SYN has been handled in rds_tcp_accept_one() */ + rds_tcp_reset_callbacks(new_sock, cp); + /* rds_connect_path_complete() marks RDS_CONN_UP */ + rds_connect_path_complete(cp, RDS_CONN_RESETTING); } else { rds_tcp_set_callbacks(new_sock, cp); rds_connect_path_complete(cp, RDS_CONN_CONNECTING); } new_sock = NULL; ret = 0; + if (conn->c_npaths == 0) + rds_send_ping(cp->cp_conn, cp->cp_index); goto out; rst_nsk: - /* reset the newly returned accept sock and bail */ + /* reset the newly returned accept sock and bail. + * It is safe to set linger on new_sock because the RDS connection + * has not been brought up on new_sock, so no RDS-level data could + * be pending on it. By setting linger, we achieve the side-effect + * of avoiding TIME_WAIT state on new_sock. + */ + rds_tcp_set_linger(new_sock); kernel_sock_shutdown(new_sock, SHUT_RDWR); ret = 0; out: diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 52d11d7725c8..0d8616aa5bad 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -202,7 +202,7 @@ void rds_tcp_write_space(struct sock *sk) tc->t_last_seen_una = rds_tcp_snd_una(tc); rds_send_path_drop_acked(cp, rds_tcp_snd_una(tc), rds_tcp_is_acked); - if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) + if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) queue_delayed_work(rds_wq, &cp->cp_send_w, 0); out: diff --git a/net/rds/threads.c b/net/rds/threads.c index 3e447d056d09..2852bc1d37d4 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -127,7 +127,7 @@ void rds_queue_reconnect(struct rds_conn_path *cp) /* let peer with smaller addr initiate reconnect, to avoid duels */ if (conn->c_trans->t_type == RDS_TRANS_TCP && - conn->c_laddr > conn->c_faddr) + !IS_CANONICAL(conn->c_laddr, conn->c_faddr)) return; set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); @@ -156,7 +156,8 @@ void rds_connect_worker(struct work_struct *work) struct rds_connection *conn = cp->cp_conn; int ret; - if (cp->cp_index > 0 && cp->cp_conn->c_laddr > cp->cp_conn->c_faddr) + if (cp->cp_index > 0 && + !IS_CANONICAL(cp->cp_conn->c_laddr, cp->cp_conn->c_faddr)) return; clear_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING); diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c index 76c01cbd56e3..41bd496531d4 100644 --- a/net/rfkill/rfkill-gpio.c +++ b/net/rfkill/rfkill-gpio.c @@ -81,8 +81,7 @@ static int rfkill_gpio_acpi_probe(struct device *dev, rfkill->type = (unsigned)id->driver_data; - return acpi_dev_add_driver_gpios(ACPI_COMPANION(dev), - acpi_rfkill_default_gpios); + return devm_acpi_dev_add_driver_gpios(dev, acpi_rfkill_default_gpios); } static int rfkill_gpio_probe(struct platform_device *pdev) @@ -154,8 +153,6 @@ static int rfkill_gpio_remove(struct platform_device *pdev) rfkill_unregister(rfkill->rfkill_dev); rfkill_destroy(rfkill->rfkill_dev); - acpi_dev_remove_driver_gpios(ACPI_COMPANION(&pdev->dev)); - return 0; } diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile index b9da4d6b914f..9c68d2f8ba39 100644 --- a/net/rxrpc/Makefile +++ b/net/rxrpc/Makefile @@ -19,6 +19,7 @@ rxrpc-y := \ local_event.o \ local_object.o \ misc.o \ + net_ns.o \ output.o \ peer_event.o \ peer_object.o \ diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 7fb59c3f1542..a2ad4482376f 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -38,9 +38,6 @@ MODULE_PARM_DESC(debug, "RxRPC debugging mask"); static struct proto rxrpc_proto; static const struct proto_ops rxrpc_rpc_ops; -/* local epoch for detecting local-end reset */ -u32 rxrpc_epoch; - /* current debugging ID */ atomic_t rxrpc_debug_id; @@ -56,7 +53,7 @@ static void rxrpc_sock_destructor(struct sock *); */ static inline int rxrpc_writable(struct sock *sk) { - return atomic_read(&sk->sk_wmem_alloc) < (size_t) sk->sk_sndbuf; + return refcount_read(&sk->sk_wmem_alloc) < (size_t) sk->sk_sndbuf; } /* @@ -134,9 +131,8 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx, static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) { struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)saddr; - struct sock *sk = sock->sk; struct rxrpc_local *local; - struct rxrpc_sock *rx = rxrpc_sk(sk); + struct rxrpc_sock *rx = rxrpc_sk(sock->sk); u16 service_id = srx->srx_service; int ret; @@ -148,31 +144,48 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) lock_sock(&rx->sk); - if (rx->sk.sk_state != RXRPC_UNBOUND) { - ret = -EINVAL; - goto error_unlock; - } - - memcpy(&rx->srx, srx, sizeof(rx->srx)); + switch (rx->sk.sk_state) { + case RXRPC_UNBOUND: + rx->srx = *srx; + local = rxrpc_lookup_local(sock_net(&rx->sk), &rx->srx); + if (IS_ERR(local)) { + ret = PTR_ERR(local); + goto error_unlock; + } - local = rxrpc_lookup_local(&rx->srx); - if (IS_ERR(local)) { - ret = PTR_ERR(local); - goto error_unlock; - } + if (service_id) { + write_lock(&local->services_lock); + if (rcu_access_pointer(local->service)) + goto service_in_use; + rx->local = local; + rcu_assign_pointer(local->service, rx); + write_unlock(&local->services_lock); + + rx->sk.sk_state = RXRPC_SERVER_BOUND; + } else { + rx->local = local; + rx->sk.sk_state = RXRPC_CLIENT_BOUND; + } + break; - if (service_id) { - write_lock(&local->services_lock); - if (rcu_access_pointer(local->service)) - goto service_in_use; - rx->local = local; - rcu_assign_pointer(local->service, rx); - write_unlock(&local->services_lock); + case RXRPC_SERVER_BOUND: + ret = -EINVAL; + if (service_id == 0) + goto error_unlock; + ret = -EADDRINUSE; + if (service_id == rx->srx.srx_service) + goto error_unlock; + ret = -EINVAL; + srx->srx_service = rx->srx.srx_service; + if (memcmp(srx, &rx->srx, sizeof(*srx)) != 0) + goto error_unlock; + rx->second_service = service_id; + rx->sk.sk_state = RXRPC_SERVER_BOUND2; + break; - rx->sk.sk_state = RXRPC_SERVER_BOUND; - } else { - rx->local = local; - rx->sk.sk_state = RXRPC_CLIENT_BOUND; + default: + ret = -EINVAL; + goto error_unlock; } release_sock(&rx->sk); @@ -209,6 +222,7 @@ static int rxrpc_listen(struct socket *sock, int backlog) ret = -EADDRNOTAVAIL; break; case RXRPC_SERVER_BOUND: + case RXRPC_SERVER_BOUND2: ASSERT(rx->local != NULL); max = READ_ONCE(rxrpc_max_backlog); ret = -EINVAL; @@ -248,6 +262,7 @@ static int rxrpc_listen(struct socket *sock, int backlog) * @srx: The address of the peer to contact * @key: The security context to use (defaults to socket setting) * @user_call_ID: The ID to use + * @tx_total_len: Total length of data to transmit during the call (or -1) * @gfp: The allocation constraints * @notify_rx: Where to send notifications instead of socket queue * @@ -262,6 +277,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, struct sockaddr_rxrpc *srx, struct key *key, unsigned long user_call_ID, + s64 tx_total_len, gfp_t gfp, rxrpc_notify_rx_t notify_rx) { @@ -289,7 +305,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, cp.security_level = 0; cp.exclusive = false; cp.service_id = srx->srx_service; - call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, gfp); + call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, tx_total_len, + gfp); /* The socket has been unlocked. */ if (!IS_ERR(call)) call->notify_rx = notify_rx; @@ -434,7 +451,7 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len) ret = -EAFNOSUPPORT; goto error_unlock; } - local = rxrpc_lookup_local(&rx->srx); + local = rxrpc_lookup_local(sock_net(sock->sk), &rx->srx); if (IS_ERR(local)) { ret = PTR_ERR(local); goto error_unlock; @@ -476,6 +493,7 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname, { struct rxrpc_sock *rx = rxrpc_sk(sock->sk); unsigned int min_sec_level; + u16 service_upgrade[2]; int ret; _enter(",%d,%d,,%d", level, optname, optlen); @@ -532,6 +550,28 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname, rx->min_sec_level = min_sec_level; goto success; + case RXRPC_UPGRADEABLE_SERVICE: + ret = -EINVAL; + if (optlen != sizeof(service_upgrade) || + rx->service_upgrade.from != 0) + goto error; + ret = -EISCONN; + if (rx->sk.sk_state != RXRPC_SERVER_BOUND2) + goto error; + ret = -EFAULT; + if (copy_from_user(service_upgrade, optval, + sizeof(service_upgrade)) != 0) + goto error; + ret = -EINVAL; + if ((service_upgrade[0] != rx->srx.srx_service || + service_upgrade[1] != rx->second_service) && + (service_upgrade[0] != rx->second_service || + service_upgrade[1] != rx->srx.srx_service)) + goto error; + rx->service_upgrade.from = service_upgrade[0]; + rx->service_upgrade.to = service_upgrade[1]; + goto success; + default: break; } @@ -545,6 +585,34 @@ error: } /* + * Get socket options. + */ +static int rxrpc_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *_optlen) +{ + int optlen; + + if (level != SOL_RXRPC) + return -EOPNOTSUPP; + + if (get_user(optlen, _optlen)) + return -EFAULT; + + switch (optname) { + case RXRPC_SUPPORTED_CMSG: + if (optlen < sizeof(int)) + return -ETOOSMALL; + if (put_user(RXRPC__SUPPORTED - 1, (int __user *)optval) || + put_user(sizeof(int), _optlen)) + return -EFAULT; + return 0; + + default: + return -EOPNOTSUPP; + } +} + +/* * permit an RxRPC socket to be polled */ static unsigned int rxrpc_poll(struct file *file, struct socket *sock, @@ -582,9 +650,6 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, _enter("%p,%d", sock, protocol); - if (!net_eq(net, &init_net)) - return -EAFNOSUPPORT; - /* we support transport protocol UDP/UDP6 only */ if (protocol != PF_INET && IS_ENABLED(CONFIG_AF_RXRPC_IPV6) && protocol != PF_INET6) @@ -665,7 +730,7 @@ static void rxrpc_sock_destructor(struct sock *sk) rxrpc_purge_queue(&sk->sk_receive_queue); - WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(refcount_read(&sk->sk_wmem_alloc)); WARN_ON(!sk_unhashed(sk)); WARN_ON(sk->sk_socket); @@ -682,7 +747,7 @@ static int rxrpc_release_sock(struct sock *sk) { struct rxrpc_sock *rx = rxrpc_sk(sk); - _enter("%p{%d,%d}", sk, sk->sk_state, atomic_read(&sk->sk_refcnt)); + _enter("%p{%d,%d}", sk, sk->sk_state, refcount_read(&sk->sk_refcnt)); /* declare the socket closed for business */ sock_orphan(sk); @@ -750,7 +815,7 @@ static const struct proto_ops rxrpc_rpc_ops = { .listen = rxrpc_listen, .shutdown = rxrpc_shutdown, .setsockopt = rxrpc_setsockopt, - .getsockopt = sock_no_getsockopt, + .getsockopt = rxrpc_getsockopt, .sendmsg = rxrpc_sendmsg, .recvmsg = rxrpc_recvmsg, .mmap = sock_no_mmap, @@ -780,8 +845,6 @@ static int __init af_rxrpc_init(void) BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > FIELD_SIZEOF(struct sk_buff, cb)); - get_random_bytes(&rxrpc_epoch, sizeof(rxrpc_epoch)); - rxrpc_epoch |= RXRPC_RANDOM_EPOCH; get_random_bytes(&tmp, sizeof(tmp)); tmp &= 0x3fffffff; if (tmp == 0) @@ -809,6 +872,10 @@ static int __init af_rxrpc_init(void) goto error_security; } + ret = register_pernet_subsys(&rxrpc_net_ops); + if (ret) + goto error_pernet; + ret = proto_register(&rxrpc_proto, 1); if (ret < 0) { pr_crit("Cannot register protocol\n"); @@ -839,11 +906,6 @@ static int __init af_rxrpc_init(void) goto error_sysctls; } -#ifdef CONFIG_PROC_FS - proc_create("rxrpc_calls", 0, init_net.proc_net, &rxrpc_call_seq_fops); - proc_create("rxrpc_conns", 0, init_net.proc_net, - &rxrpc_connection_seq_fops); -#endif return 0; error_sysctls: @@ -855,6 +917,8 @@ error_key_type: error_sock: proto_unregister(&rxrpc_proto); error_proto: + unregister_pernet_subsys(&rxrpc_net_ops); +error_pernet: rxrpc_exit_security(); error_security: destroy_workqueue(rxrpc_workqueue); @@ -875,14 +939,16 @@ static void __exit af_rxrpc_exit(void) unregister_key_type(&key_type_rxrpc); sock_unregister(PF_RXRPC); proto_unregister(&rxrpc_proto); - rxrpc_destroy_all_calls(); - rxrpc_destroy_all_connections(); + unregister_pernet_subsys(&rxrpc_net_ops); ASSERTCMP(atomic_read(&rxrpc_n_tx_skbs), ==, 0); ASSERTCMP(atomic_read(&rxrpc_n_rx_skbs), ==, 0); - rxrpc_destroy_all_locals(); - remove_proc_entry("rxrpc_conns", init_net.proc_net); - remove_proc_entry("rxrpc_calls", init_net.proc_net); + /* Make sure the local and peer records pinned by any dying connections + * are released. + */ + rcu_barrier(); + rxrpc_destroy_client_conn_ids(); + destroy_workqueue(rxrpc_workqueue); rxrpc_exit_security(); kmem_cache_destroy(rxrpc_call_jar); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 7486926e60a8..69b97339ff9d 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -11,6 +11,8 @@ #include <linux/atomic.h> #include <linux/seqlock.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> #include <net/sock.h> #include <net/af_rxrpc.h> #include <rxrpc/packet.h> @@ -59,12 +61,44 @@ enum { RXRPC_CLIENT_UNBOUND, /* Unbound socket used as client */ RXRPC_CLIENT_BOUND, /* client local address bound */ RXRPC_SERVER_BOUND, /* server local address bound */ + RXRPC_SERVER_BOUND2, /* second server local address bound */ RXRPC_SERVER_LISTENING, /* server listening for connections */ RXRPC_SERVER_LISTEN_DISABLED, /* server listening disabled */ RXRPC_CLOSE, /* socket is being closed */ }; /* + * Per-network namespace data. + */ +struct rxrpc_net { + struct proc_dir_entry *proc_net; /* Subdir in /proc/net */ + u32 epoch; /* Local epoch for detecting local-end reset */ + struct list_head calls; /* List of calls active in this namespace */ + rwlock_t call_lock; /* Lock for ->calls */ + + struct list_head conn_proc_list; /* List of conns in this namespace for proc */ + struct list_head service_conns; /* Service conns in this namespace */ + rwlock_t conn_lock; /* Lock for ->conn_proc_list, ->service_conns */ + struct delayed_work service_conn_reaper; + + unsigned int nr_client_conns; + unsigned int nr_active_client_conns; + bool kill_all_client_conns; + spinlock_t client_conn_cache_lock; /* Lock for ->*_client_conns */ + spinlock_t client_conn_discard_lock; /* Prevent multiple discarders */ + struct list_head waiting_client_conns; + struct list_head active_client_conns; + struct list_head idle_client_conns; + struct delayed_work client_conn_reaper; + + struct list_head local_endpoints; + struct mutex local_mutex; /* Lock for ->local_endpoints */ + + spinlock_t peer_hash_lock; /* Lock for ->peer_hash */ + DECLARE_HASHTABLE (peer_hash, 10); +}; + +/* * Service backlog preallocation. * * This contains circular buffers of preallocated peers, connections and calls @@ -109,8 +143,14 @@ struct rxrpc_sock { u32 min_sec_level; /* minimum security level */ #define RXRPC_SECURITY_MAX RXRPC_SECURITY_ENCRYPT bool exclusive; /* Exclusive connection for a client socket */ + u16 second_service; /* Additional service bound to the endpoint */ + struct { + /* Service upgrade information */ + u16 from; /* Service ID to upgrade (if not 0) */ + u16 to; /* service ID to upgrade to */ + } service_upgrade; sa_family_t family; /* Protocol family created with */ - struct sockaddr_rxrpc srx; /* local address */ + struct sockaddr_rxrpc srx; /* Primary Service/local addresses */ struct sockaddr_rxrpc connect_srx; /* Default client address from connect() */ }; @@ -211,6 +251,7 @@ struct rxrpc_security { struct rxrpc_local { struct rcu_head rcu; atomic_t usage; + struct rxrpc_net *rxnet; /* The network ns in which this resides */ struct list_head link; struct socket *socket; /* my UDP socket */ struct work_struct processor; @@ -259,6 +300,8 @@ struct rxrpc_peer { u64 rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* Determined RTT cache */ u8 rtt_cursor; /* next entry at which to insert */ u8 rtt_usage; /* amount of cache actually used */ + + u8 cong_cwnd; /* Congestion window size */ }; /* @@ -279,6 +322,7 @@ struct rxrpc_conn_parameters { struct rxrpc_peer *peer; /* Remote endpoint */ struct key *key; /* Security details */ bool exclusive; /* T if conn is exclusive */ + bool upgrade; /* T if service ID can be upgraded */ u16 service_id; /* Service ID for this connection */ u32 security_level; /* Security level selected */ }; @@ -293,6 +337,7 @@ enum rxrpc_conn_flag { RXRPC_CONN_EXPOSED, /* Conn has extra ref for exposure */ RXRPC_CONN_DONT_REUSE, /* Don't reuse this connection */ RXRPC_CONN_COUNTED, /* Counted by rxrpc_nr_client_conns */ + RXRPC_CONN_PROBING_FOR_UPGRADE, /* Probing for service upgrade */ }; /* @@ -309,6 +354,7 @@ enum rxrpc_conn_cache_state { RXRPC_CONN_CLIENT_INACTIVE, /* Conn is not yet listed */ RXRPC_CONN_CLIENT_WAITING, /* Conn is on wait list, waiting for capacity */ RXRPC_CONN_CLIENT_ACTIVE, /* Conn is on active list, doing calls */ + RXRPC_CONN_CLIENT_UPGRADE, /* Conn is on active list, probing for upgrade */ RXRPC_CONN_CLIENT_CULLED, /* Conn is culled and delisted, doing calls */ RXRPC_CONN_CLIENT_IDLE, /* Conn is on idle list, doing mostly nothing */ RXRPC_CONN__NR_CACHE_STATES @@ -352,7 +398,6 @@ struct rxrpc_connection { u32 call_counter; /* Call ID counter */ u32 last_call; /* ID of last call */ u8 last_type; /* Type of last packet */ - u16 last_service_id; union { u32 last_seq; u32 last_abort; @@ -383,6 +428,7 @@ struct rxrpc_connection { atomic_t serial; /* packet serial number counter */ unsigned int hi_serial; /* highest serial number received */ u32 security_nonce; /* response re-use preventer */ + u16 service_id; /* Service ID, possibly upgraded */ u8 size_align; /* data size alignment (for security) */ u8 security_size; /* security header size */ u8 security_ix; /* security type */ @@ -484,6 +530,7 @@ struct rxrpc_call { struct rb_node sock_node; /* Node in rx->calls */ struct sk_buff *tx_pending; /* Tx socket buffer being filled */ wait_queue_head_t waitq; /* Wait queue for channel or Tx */ + s64 tx_total_len; /* Total length left to be transmitted (or -1) */ __be32 crypto_buf[2]; /* Temporary packet crypto buffer */ unsigned long user_call_ID; /* user-defined call ID */ unsigned long flags; @@ -601,7 +648,6 @@ struct rxrpc_ack_summary { * af_rxrpc.c */ extern atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs; -extern u32 rxrpc_epoch; extern atomic_t rxrpc_debug_id; extern struct workqueue_struct *rxrpc_workqueue; @@ -634,15 +680,13 @@ extern const char *const rxrpc_call_states[]; extern const char *const rxrpc_call_completions[]; extern unsigned int rxrpc_max_call_lifetime; extern struct kmem_cache *rxrpc_call_jar; -extern struct list_head rxrpc_calls; -extern rwlock_t rxrpc_call_lock; struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long); struct rxrpc_call *rxrpc_alloc_call(gfp_t); struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, - unsigned long, gfp_t); + unsigned long, s64, gfp_t); void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *, struct sk_buff *); void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *); @@ -653,7 +697,7 @@ void rxrpc_see_call(struct rxrpc_call *); void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_cleanup_call(struct rxrpc_call *); -void __exit rxrpc_destroy_all_calls(void); +void rxrpc_destroy_all_calls(struct rxrpc_net *); static inline bool rxrpc_is_service_call(const struct rxrpc_call *call) { @@ -773,7 +817,8 @@ int rxrpc_connect_call(struct rxrpc_call *, struct rxrpc_conn_parameters *, void rxrpc_expose_client_call(struct rxrpc_call *); void rxrpc_disconnect_client_call(struct rxrpc_call *); void rxrpc_put_client_conn(struct rxrpc_connection *); -void __exit rxrpc_destroy_all_client_connections(void); +void rxrpc_discard_expired_client_conns(struct work_struct *); +void rxrpc_destroy_all_client_connections(struct rxrpc_net *); /* * conn_event.c @@ -784,9 +829,6 @@ void rxrpc_process_connection(struct work_struct *); * conn_object.c */ extern unsigned int rxrpc_connection_expiry; -extern struct list_head rxrpc_connections; -extern struct list_head rxrpc_connection_proc_list; -extern rwlock_t rxrpc_connection_lock; int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *); struct rxrpc_connection *rxrpc_alloc_connection(gfp_t); @@ -800,7 +842,8 @@ void rxrpc_see_connection(struct rxrpc_connection *); void rxrpc_get_connection(struct rxrpc_connection *); struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *); void rxrpc_put_service_conn(struct rxrpc_connection *); -void __exit rxrpc_destroy_all_connections(void); +void rxrpc_service_connection_reaper(struct work_struct *); +void rxrpc_destroy_all_connections(struct rxrpc_net *); static inline bool rxrpc_conn_is_client(const struct rxrpc_connection *conn) { @@ -828,8 +871,9 @@ static inline void rxrpc_put_connection(struct rxrpc_connection *conn) */ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *, struct sk_buff *); -struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t); -void rxrpc_new_incoming_connection(struct rxrpc_connection *, struct sk_buff *); +struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *, gfp_t); +void rxrpc_new_incoming_connection(struct rxrpc_sock *, + struct rxrpc_connection *, struct sk_buff *); void rxrpc_unpublish_service_conn(struct rxrpc_connection *); /* @@ -861,9 +905,9 @@ extern void rxrpc_process_local_events(struct rxrpc_local *); /* * local_object.c */ -struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *); +struct rxrpc_local *rxrpc_lookup_local(struct net *, const struct sockaddr_rxrpc *); void __rxrpc_put_local(struct rxrpc_local *); -void __exit rxrpc_destroy_all_locals(void); +void rxrpc_destroy_all_locals(struct rxrpc_net *); static inline void rxrpc_get_local(struct rxrpc_local *local) { @@ -902,6 +946,17 @@ extern unsigned int rxrpc_resend_timeout; extern const s8 rxrpc_ack_priority[]; /* + * net_ns.c + */ +extern unsigned int rxrpc_net_id; +extern struct pernet_operations rxrpc_net_ops; + +static inline struct rxrpc_net *rxrpc_net(struct net *net) +{ + return net_generic(net, rxrpc_net_id); +} + +/* * output.c */ int rxrpc_send_ack_packet(struct rxrpc_call *, bool); diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 1752fcf8e8f1..dd30d74824b0 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -38,6 +38,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, { const void *here = __builtin_return_address(0); struct rxrpc_call *call; + struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk)); int max, tmp; unsigned int size = RXRPC_BACKLOG_MAX; unsigned int head, tail, call_head, call_tail; @@ -79,7 +80,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, if (CIRC_CNT(head, tail, size) < max) { struct rxrpc_connection *conn; - conn = rxrpc_prealloc_service_connection(gfp); + conn = rxrpc_prealloc_service_connection(rxnet, gfp); if (!conn) return -ENOMEM; b->conn_backlog[head] = conn; @@ -136,9 +137,9 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, write_unlock(&rx->call_lock); - write_lock(&rxrpc_call_lock); - list_add_tail(&call->link, &rxrpc_calls); - write_unlock(&rxrpc_call_lock); + write_lock(&rxnet->call_lock); + list_add_tail(&call->link, &rxnet->calls); + write_unlock(&rxnet->call_lock); b->call_backlog[call_head] = call; smp_store_release(&b->call_backlog_head, (call_head + 1) & (size - 1)); @@ -185,6 +186,7 @@ int rxrpc_service_prealloc(struct rxrpc_sock *rx, gfp_t gfp) void rxrpc_discard_prealloc(struct rxrpc_sock *rx) { struct rxrpc_backlog *b = rx->backlog; + struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk)); unsigned int size = RXRPC_BACKLOG_MAX, head, tail; if (!b) @@ -209,10 +211,10 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) tail = b->conn_backlog_tail; while (CIRC_CNT(head, tail, size) > 0) { struct rxrpc_connection *conn = b->conn_backlog[tail]; - write_lock(&rxrpc_connection_lock); + write_lock(&rxnet->conn_lock); list_del(&conn->link); list_del(&conn->proc_link); - write_unlock(&rxrpc_connection_lock); + write_unlock(&rxnet->conn_lock); kfree(conn); tail = (tail + 1) & (size - 1); } @@ -294,7 +296,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, conn->params.local = local; conn->params.peer = peer; rxrpc_see_connection(conn); - rxrpc_new_incoming_connection(conn, skb); + rxrpc_new_incoming_connection(rx, conn, skb); } else { rxrpc_get_connection(conn); } @@ -308,6 +310,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, rxrpc_see_call(call); call->conn = conn; call->peer = rxrpc_get_peer(conn->params.peer); + call->cong_cwnd = call->peer->cong_cwnd; return call; } @@ -339,7 +342,8 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, /* Get the socket providing the service */ rx = rcu_dereference(local->service); - if (rx && service_id == rx->srx.srx_service) + if (rx && (service_id == rx->srx.srx_service || + service_id == rx->second_service)) goto found_service; trace_rxrpc_abort("INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 47f7f4205653..d7809a0620b4 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -44,8 +44,6 @@ const char *const rxrpc_call_completions[NR__RXRPC_CALL_COMPLETIONS] = { }; struct kmem_cache *rxrpc_call_jar; -LIST_HEAD(rxrpc_calls); -DEFINE_RWLOCK(rxrpc_call_lock); static void rxrpc_call_timer_expired(unsigned long _call) { @@ -129,6 +127,7 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) rwlock_init(&call->state_lock); atomic_set(&call->usage, 1); call->debug_id = atomic_inc_return(&rxrpc_debug_id); + call->tx_total_len = -1; memset(&call->sock_node, 0xed, sizeof(call->sock_node)); @@ -137,12 +136,7 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) call->tx_winsize = 16; call->rx_expect_next = 1; - if (RXRPC_TX_SMSS > 2190) - call->cong_cwnd = 2; - else if (RXRPC_TX_SMSS > 1095) - call->cong_cwnd = 3; - else - call->cong_cwnd = 4; + call->cong_cwnd = 2; call->cong_ssthresh = RXRPC_RXTX_BUFF_SIZE - 1; return call; @@ -203,10 +197,12 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, struct rxrpc_conn_parameters *cp, struct sockaddr_rxrpc *srx, unsigned long user_call_ID, + s64 tx_total_len, gfp_t gfp) __releases(&rx->sk.sk_lock.slock) { struct rxrpc_call *call, *xcall; + struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk)); struct rb_node *parent, **pp; const void *here = __builtin_return_address(0); int ret; @@ -220,6 +216,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, return call; } + call->tx_total_len = tx_total_len; trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage), here, (const void *)user_call_ID); @@ -255,9 +252,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, write_unlock(&rx->call_lock); - write_lock(&rxrpc_call_lock); - list_add_tail(&call->link, &rxrpc_calls); - write_unlock(&rxrpc_call_lock); + write_lock(&rxnet->call_lock); + list_add_tail(&call->link, &rxnet->calls); + write_unlock(&rxnet->call_lock); /* From this point on, the call is protected by its own lock. */ release_sock(&rx->sk); @@ -508,6 +505,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) */ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op) { + struct rxrpc_net *rxnet; const void *here = __builtin_return_address(0); int n; @@ -520,9 +518,12 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op) _debug("call %d dead", call->debug_id); ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); - write_lock(&rxrpc_call_lock); - list_del_init(&call->link); - write_unlock(&rxrpc_call_lock); + if (!list_empty(&call->link)) { + rxnet = rxrpc_net(sock_net(&call->socket->sk)); + write_lock(&rxnet->call_lock); + list_del_init(&call->link); + write_unlock(&rxnet->call_lock); + } rxrpc_cleanup_call(call); } @@ -570,21 +571,23 @@ void rxrpc_cleanup_call(struct rxrpc_call *call) } /* - * Make sure that all calls are gone. + * Make sure that all calls are gone from a network namespace. To reach this + * point, any open UDP sockets in that namespace must have been closed, so any + * outstanding calls cannot be doing I/O. */ -void __exit rxrpc_destroy_all_calls(void) +void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet) { struct rxrpc_call *call; _enter(""); - if (list_empty(&rxrpc_calls)) + if (list_empty(&rxnet->calls)) return; - write_lock(&rxrpc_call_lock); + write_lock(&rxnet->call_lock); - while (!list_empty(&rxrpc_calls)) { - call = list_entry(rxrpc_calls.next, struct rxrpc_call, link); + while (!list_empty(&rxnet->calls)) { + call = list_entry(rxnet->calls.next, struct rxrpc_call, link); _debug("Zapping call %p", call); rxrpc_see_call(call); @@ -595,10 +598,10 @@ void __exit rxrpc_destroy_all_calls(void) rxrpc_call_states[call->state], call->flags, call->events); - write_unlock(&rxrpc_call_lock); + write_unlock(&rxnet->call_lock); cond_resched(); - write_lock(&rxrpc_call_lock); + write_lock(&rxnet->call_lock); } - write_unlock(&rxrpc_call_lock); + write_unlock(&rxnet->call_lock); } diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index e8dea0d49e7f..eb2157680399 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -31,22 +31,25 @@ * may freely grant available channels to new calls and calls may be * waiting on it for channels to become available. * - * The connection is on the rxrpc_active_client_conns list which is kept + * The connection is on the rxnet->active_client_conns list which is kept * in activation order for culling purposes. * * rxrpc_nr_active_client_conns is held incremented also. * - * (4) CULLED - The connection got summarily culled to try and free up + * (4) UPGRADE - As for ACTIVE, but only one call may be in progress and is + * being used to probe for service upgrade. + * + * (5) CULLED - The connection got summarily culled to try and free up * capacity. Calls currently in progress on the connection are allowed to * continue, but new calls will have to wait. There can be no waiters in * this state - the conn would have to go to the WAITING state instead. * - * (5) IDLE - The connection has no calls in progress upon it and must have + * (6) IDLE - The connection has no calls in progress upon it and must have * been exposed to the world (ie. the EXPOSED flag must be set). When it * expires, the EXPOSED flag is cleared and the connection transitions to * the INACTIVE state. * - * The connection is on the rxrpc_idle_client_conns list which is kept in + * The connection is on the rxnet->idle_client_conns list which is kept in * order of how soon they'll expire. * * There are flags of relevance to the cache: @@ -85,27 +88,13 @@ __read_mostly unsigned int rxrpc_reap_client_connections = 900; __read_mostly unsigned int rxrpc_conn_idle_client_expiry = 2 * 60 * HZ; __read_mostly unsigned int rxrpc_conn_idle_client_fast_expiry = 2 * HZ; -static unsigned int rxrpc_nr_client_conns; -static unsigned int rxrpc_nr_active_client_conns; -static __read_mostly bool rxrpc_kill_all_client_conns; - -static DEFINE_SPINLOCK(rxrpc_client_conn_cache_lock); -static DEFINE_SPINLOCK(rxrpc_client_conn_discard_mutex); -static LIST_HEAD(rxrpc_waiting_client_conns); -static LIST_HEAD(rxrpc_active_client_conns); -static LIST_HEAD(rxrpc_idle_client_conns); - /* * We use machine-unique IDs for our client connections. */ DEFINE_IDR(rxrpc_client_conn_ids); static DEFINE_SPINLOCK(rxrpc_conn_id_lock); -static void rxrpc_cull_active_client_conns(void); -static void rxrpc_discard_expired_client_conns(struct work_struct *); - -static DECLARE_DELAYED_WORK(rxrpc_client_conn_reap, - rxrpc_discard_expired_client_conns); +static void rxrpc_cull_active_client_conns(struct rxrpc_net *); /* * Get a connection ID and epoch for a client connection from the global pool. @@ -116,6 +105,7 @@ static DECLARE_DELAYED_WORK(rxrpc_client_conn_reap, static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn, gfp_t gfp) { + struct rxrpc_net *rxnet = conn->params.local->rxnet; int id; _enter(""); @@ -131,7 +121,7 @@ static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn, spin_unlock(&rxrpc_conn_id_lock); idr_preload_end(); - conn->proto.epoch = rxrpc_epoch; + conn->proto.epoch = rxnet->epoch; conn->proto.cid = id << RXRPC_CIDSHIFT; set_bit(RXRPC_CONN_HAS_IDR, &conn->flags); _leave(" [CID %x]", conn->proto.cid); @@ -183,6 +173,7 @@ static struct rxrpc_connection * rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp) { struct rxrpc_connection *conn; + struct rxrpc_net *rxnet = cp->local->rxnet; int ret; _enter(""); @@ -196,10 +187,13 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp) atomic_set(&conn->usage, 1); if (cp->exclusive) __set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); + if (cp->upgrade) + __set_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags); conn->params = *cp; conn->out_clientflag = RXRPC_CLIENT_INITIATED; conn->state = RXRPC_CONN_CLIENT; + conn->service_id = cp->service_id; ret = rxrpc_get_client_connection_id(conn, gfp); if (ret < 0) @@ -213,9 +207,9 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp) if (ret < 0) goto error_2; - write_lock(&rxrpc_connection_lock); - list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list); - write_unlock(&rxrpc_connection_lock); + write_lock(&rxnet->conn_lock); + list_add_tail(&conn->proc_link, &rxnet->conn_proc_list); + write_unlock(&rxnet->conn_lock); /* We steal the caller's peer ref. */ cp->peer = NULL; @@ -243,12 +237,13 @@ error_0: */ static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn) { + struct rxrpc_net *rxnet = conn->params.local->rxnet; int id_cursor, id, distance, limit; if (test_bit(RXRPC_CONN_DONT_REUSE, &conn->flags)) goto dont_reuse; - if (conn->proto.epoch != rxrpc_epoch) + if (conn->proto.epoch != rxnet->epoch) goto mark_dont_reuse; /* The IDR tree gets very expensive on memory if the connection IDs are @@ -297,6 +292,12 @@ static int rxrpc_get_client_conn(struct rxrpc_call *call, if (!cp->peer) goto error; + call->cong_cwnd = cp->peer->cong_cwnd; + if (call->cong_cwnd >= call->cong_ssthresh) + call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; + else + call->cong_mode = RXRPC_CALL_SLOW_START; + /* If the connection is not meant to be exclusive, search the available * connections to see if the connection we want to use already exists. */ @@ -310,7 +311,8 @@ static int rxrpc_get_client_conn(struct rxrpc_call *call, #define cmp(X) ((long)conn->params.X - (long)cp->X) diff = (cmp(peer) ?: cmp(key) ?: - cmp(security_level)); + cmp(security_level) ?: + cmp(upgrade)); #undef cmp if (diff < 0) { p = p->rb_left; @@ -354,6 +356,7 @@ static int rxrpc_get_client_conn(struct rxrpc_call *call, if (cp->exclusive) { call->conn = candidate; call->security_ix = candidate->security_ix; + call->service_id = candidate->service_id; _leave(" = 0 [exclusive %d]", candidate->debug_id); return 0; } @@ -374,7 +377,8 @@ static int rxrpc_get_client_conn(struct rxrpc_call *call, #define cmp(X) ((long)conn->params.X - (long)candidate->params.X) diff = (cmp(peer) ?: cmp(key) ?: - cmp(security_level)); + cmp(security_level) ?: + cmp(upgrade)); #undef cmp if (diff < 0) { pp = &(*pp)->rb_left; @@ -403,6 +407,7 @@ candidate_published: set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags); call->conn = candidate; call->security_ix = candidate->security_ix; + call->service_id = candidate->service_id; spin_unlock(&local->client_conns_lock); _leave(" = 0 [new %d]", candidate->debug_id); return 0; @@ -424,6 +429,7 @@ found_extant_conn: spin_lock(&conn->channel_lock); call->conn = conn; call->security_ix = conn->security_ix; + call->service_id = conn->service_id; list_add(&call->chan_wait_link, &conn->waiting_calls); spin_unlock(&conn->channel_lock); _leave(" = 0 [extant %d]", conn->debug_id); @@ -440,12 +446,18 @@ error: /* * Activate a connection. */ -static void rxrpc_activate_conn(struct rxrpc_connection *conn) +static void rxrpc_activate_conn(struct rxrpc_net *rxnet, + struct rxrpc_connection *conn) { - trace_rxrpc_client(conn, -1, rxrpc_client_to_active); - conn->cache_state = RXRPC_CONN_CLIENT_ACTIVE; - rxrpc_nr_active_client_conns++; - list_move_tail(&conn->cache_link, &rxrpc_active_client_conns); + if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags)) { + trace_rxrpc_client(conn, -1, rxrpc_client_to_upgrade); + conn->cache_state = RXRPC_CONN_CLIENT_UPGRADE; + } else { + trace_rxrpc_client(conn, -1, rxrpc_client_to_active); + conn->cache_state = RXRPC_CONN_CLIENT_ACTIVE; + } + rxnet->nr_active_client_conns++; + list_move_tail(&conn->cache_link, &rxnet->active_client_conns); } /* @@ -460,25 +472,28 @@ static void rxrpc_activate_conn(struct rxrpc_connection *conn) * channels if it has been culled to make space and then re-requested by a new * call. */ -static void rxrpc_animate_client_conn(struct rxrpc_connection *conn) +static void rxrpc_animate_client_conn(struct rxrpc_net *rxnet, + struct rxrpc_connection *conn) { unsigned int nr_conns; _enter("%d,%d", conn->debug_id, conn->cache_state); - if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE) + if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE || + conn->cache_state == RXRPC_CONN_CLIENT_UPGRADE) goto out; - spin_lock(&rxrpc_client_conn_cache_lock); + spin_lock(&rxnet->client_conn_cache_lock); - nr_conns = rxrpc_nr_client_conns; + nr_conns = rxnet->nr_client_conns; if (!test_and_set_bit(RXRPC_CONN_COUNTED, &conn->flags)) { trace_rxrpc_client(conn, -1, rxrpc_client_count); - rxrpc_nr_client_conns = nr_conns + 1; + rxnet->nr_client_conns = nr_conns + 1; } switch (conn->cache_state) { case RXRPC_CONN_CLIENT_ACTIVE: + case RXRPC_CONN_CLIENT_UPGRADE: case RXRPC_CONN_CLIENT_WAITING: break; @@ -494,21 +509,21 @@ static void rxrpc_animate_client_conn(struct rxrpc_connection *conn) } out_unlock: - spin_unlock(&rxrpc_client_conn_cache_lock); + spin_unlock(&rxnet->client_conn_cache_lock); out: _leave(" [%d]", conn->cache_state); return; activate_conn: _debug("activate"); - rxrpc_activate_conn(conn); + rxrpc_activate_conn(rxnet, conn); goto out_unlock; wait_for_capacity: _debug("wait"); trace_rxrpc_client(conn, -1, rxrpc_client_to_waiting); conn->cache_state = RXRPC_CONN_CLIENT_WAITING; - list_move_tail(&conn->cache_link, &rxrpc_waiting_client_conns); + list_move_tail(&conn->cache_link, &rxnet->waiting_client_conns); goto out_unlock; } @@ -582,6 +597,9 @@ static void rxrpc_activate_channels_locked(struct rxrpc_connection *conn) case RXRPC_CONN_CLIENT_ACTIVE: mask = RXRPC_ACTIVE_CHANS_MASK; break; + case RXRPC_CONN_CLIENT_UPGRADE: + mask = 0x01; + break; default: return; } @@ -660,18 +678,19 @@ int rxrpc_connect_call(struct rxrpc_call *call, struct sockaddr_rxrpc *srx, gfp_t gfp) { + struct rxrpc_net *rxnet = cp->local->rxnet; int ret; _enter("{%d,%lx},", call->debug_id, call->user_call_ID); - rxrpc_discard_expired_client_conns(NULL); - rxrpc_cull_active_client_conns(); + rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper.work); + rxrpc_cull_active_client_conns(rxnet); ret = rxrpc_get_client_conn(call, cp, srx, gfp); if (ret < 0) return ret; - rxrpc_animate_client_conn(call->conn); + rxrpc_animate_client_conn(rxnet, call->conn); rxrpc_activate_channels(call->conn); ret = rxrpc_wait_for_channel(call, gfp); @@ -729,6 +748,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) unsigned int channel = call->cid & RXRPC_CHANNELMASK; struct rxrpc_connection *conn = call->conn; struct rxrpc_channel *chan = &conn->channels[channel]; + struct rxrpc_net *rxnet = rxrpc_net(sock_net(&call->socket->sk)); trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect); call->conn = NULL; @@ -750,7 +770,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) /* We must deactivate or idle the connection if it's now * waiting for nothing. */ - spin_lock(&rxrpc_client_conn_cache_lock); + spin_lock(&rxnet->client_conn_cache_lock); if (conn->cache_state == RXRPC_CONN_CLIENT_WAITING && list_empty(&conn->waiting_calls) && !conn->active_chans) @@ -787,14 +807,23 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) * list. It might even get moved back to the active list whilst we're * waiting for the lock. */ - spin_lock(&rxrpc_client_conn_cache_lock); + spin_lock(&rxnet->client_conn_cache_lock); switch (conn->cache_state) { + case RXRPC_CONN_CLIENT_UPGRADE: + /* Deal with termination of a service upgrade probe. */ + if (test_bit(RXRPC_CONN_EXPOSED, &conn->flags)) { + clear_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags); + trace_rxrpc_client(conn, channel, rxrpc_client_to_active); + conn->cache_state = RXRPC_CONN_CLIENT_ACTIVE; + rxrpc_activate_channels_locked(conn); + } + /* fall through */ case RXRPC_CONN_CLIENT_ACTIVE: if (list_empty(&conn->waiting_calls)) { rxrpc_deactivate_one_channel(conn, channel); if (!conn->active_chans) { - rxrpc_nr_active_client_conns--; + rxnet->nr_active_client_conns--; goto idle_connection; } goto out; @@ -820,7 +849,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) } out: - spin_unlock(&rxrpc_client_conn_cache_lock); + spin_unlock(&rxnet->client_conn_cache_lock); out_2: spin_unlock(&conn->channel_lock); rxrpc_put_connection(conn); @@ -835,11 +864,11 @@ idle_connection: trace_rxrpc_client(conn, channel, rxrpc_client_to_idle); conn->idle_timestamp = jiffies; conn->cache_state = RXRPC_CONN_CLIENT_IDLE; - list_move_tail(&conn->cache_link, &rxrpc_idle_client_conns); - if (rxrpc_idle_client_conns.next == &conn->cache_link && - !rxrpc_kill_all_client_conns) + list_move_tail(&conn->cache_link, &rxnet->idle_client_conns); + if (rxnet->idle_client_conns.next == &conn->cache_link && + !rxnet->kill_all_client_conns) queue_delayed_work(rxrpc_workqueue, - &rxrpc_client_conn_reap, + &rxnet->client_conn_reaper, rxrpc_conn_idle_client_expiry); } else { trace_rxrpc_client(conn, channel, rxrpc_client_to_inactive); @@ -857,6 +886,7 @@ rxrpc_put_one_client_conn(struct rxrpc_connection *conn) { struct rxrpc_connection *next = NULL; struct rxrpc_local *local = conn->params.local; + struct rxrpc_net *rxnet = local->rxnet; unsigned int nr_conns; trace_rxrpc_client(conn, -1, rxrpc_client_cleanup); @@ -875,18 +905,18 @@ rxrpc_put_one_client_conn(struct rxrpc_connection *conn) if (test_bit(RXRPC_CONN_COUNTED, &conn->flags)) { trace_rxrpc_client(conn, -1, rxrpc_client_uncount); - spin_lock(&rxrpc_client_conn_cache_lock); - nr_conns = --rxrpc_nr_client_conns; + spin_lock(&rxnet->client_conn_cache_lock); + nr_conns = --rxnet->nr_client_conns; if (nr_conns < rxrpc_max_client_connections && - !list_empty(&rxrpc_waiting_client_conns)) { - next = list_entry(rxrpc_waiting_client_conns.next, + !list_empty(&rxnet->waiting_client_conns)) { + next = list_entry(rxnet->waiting_client_conns.next, struct rxrpc_connection, cache_link); rxrpc_get_connection(next); - rxrpc_activate_conn(next); + rxrpc_activate_conn(rxnet, next); } - spin_unlock(&rxrpc_client_conn_cache_lock); + spin_unlock(&rxnet->client_conn_cache_lock); } rxrpc_kill_connection(conn); @@ -921,10 +951,10 @@ void rxrpc_put_client_conn(struct rxrpc_connection *conn) /* * Kill the longest-active client connections to make room for new ones. */ -static void rxrpc_cull_active_client_conns(void) +static void rxrpc_cull_active_client_conns(struct rxrpc_net *rxnet) { struct rxrpc_connection *conn; - unsigned int nr_conns = rxrpc_nr_client_conns; + unsigned int nr_conns = rxnet->nr_client_conns; unsigned int nr_active, limit; _enter(""); @@ -936,14 +966,15 @@ static void rxrpc_cull_active_client_conns(void) } limit = rxrpc_reap_client_connections; - spin_lock(&rxrpc_client_conn_cache_lock); - nr_active = rxrpc_nr_active_client_conns; + spin_lock(&rxnet->client_conn_cache_lock); + nr_active = rxnet->nr_active_client_conns; while (nr_active > limit) { - ASSERT(!list_empty(&rxrpc_active_client_conns)); - conn = list_entry(rxrpc_active_client_conns.next, + ASSERT(!list_empty(&rxnet->active_client_conns)); + conn = list_entry(rxnet->active_client_conns.next, struct rxrpc_connection, cache_link); - ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_ACTIVE); + ASSERTIFCMP(conn->cache_state != RXRPC_CONN_CLIENT_ACTIVE, + conn->cache_state, ==, RXRPC_CONN_CLIENT_UPGRADE); if (list_empty(&conn->waiting_calls)) { trace_rxrpc_client(conn, -1, rxrpc_client_to_culled); @@ -953,14 +984,14 @@ static void rxrpc_cull_active_client_conns(void) trace_rxrpc_client(conn, -1, rxrpc_client_to_waiting); conn->cache_state = RXRPC_CONN_CLIENT_WAITING; list_move_tail(&conn->cache_link, - &rxrpc_waiting_client_conns); + &rxnet->waiting_client_conns); } nr_active--; } - rxrpc_nr_active_client_conns = nr_active; - spin_unlock(&rxrpc_client_conn_cache_lock); + rxnet->nr_active_client_conns = nr_active; + spin_unlock(&rxnet->client_conn_cache_lock); ASSERTCMP(nr_active, >=, 0); _leave(" [culled]"); } @@ -972,22 +1003,25 @@ static void rxrpc_cull_active_client_conns(void) * This may be called from conn setup or from a work item so cannot be * considered non-reentrant. */ -static void rxrpc_discard_expired_client_conns(struct work_struct *work) +void rxrpc_discard_expired_client_conns(struct work_struct *work) { struct rxrpc_connection *conn; + struct rxrpc_net *rxnet = + container_of(to_delayed_work(work), + struct rxrpc_net, client_conn_reaper); unsigned long expiry, conn_expires_at, now; unsigned int nr_conns; bool did_discard = false; - _enter("%c", work ? 'w' : 'n'); + _enter(""); - if (list_empty(&rxrpc_idle_client_conns)) { + if (list_empty(&rxnet->idle_client_conns)) { _leave(" [empty]"); return; } /* Don't double up on the discarding */ - if (!spin_trylock(&rxrpc_client_conn_discard_mutex)) { + if (!spin_trylock(&rxnet->client_conn_discard_lock)) { _leave(" [already]"); return; } @@ -995,19 +1029,19 @@ static void rxrpc_discard_expired_client_conns(struct work_struct *work) /* We keep an estimate of what the number of conns ought to be after * we've discarded some so that we don't overdo the discarding. */ - nr_conns = rxrpc_nr_client_conns; + nr_conns = rxnet->nr_client_conns; next: - spin_lock(&rxrpc_client_conn_cache_lock); + spin_lock(&rxnet->client_conn_cache_lock); - if (list_empty(&rxrpc_idle_client_conns)) + if (list_empty(&rxnet->idle_client_conns)) goto out; - conn = list_entry(rxrpc_idle_client_conns.next, + conn = list_entry(rxnet->idle_client_conns.next, struct rxrpc_connection, cache_link); ASSERT(test_bit(RXRPC_CONN_EXPOSED, &conn->flags)); - if (!rxrpc_kill_all_client_conns) { + if (!rxnet->kill_all_client_conns) { /* If the number of connections is over the reap limit, we * expedite discard by reducing the expiry timeout. We must, * however, have at least a short grace period to be able to do @@ -1030,7 +1064,7 @@ next: conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE; list_del_init(&conn->cache_link); - spin_unlock(&rxrpc_client_conn_cache_lock); + spin_unlock(&rxnet->client_conn_cache_lock); /* When we cleared the EXPOSED flag, we took on responsibility for the * reference that that had on the usage count. We deal with that here. @@ -1050,14 +1084,14 @@ not_yet_expired: * then things get messier. */ _debug("not yet"); - if (!rxrpc_kill_all_client_conns) + if (!rxnet->kill_all_client_conns) queue_delayed_work(rxrpc_workqueue, - &rxrpc_client_conn_reap, + &rxnet->client_conn_reaper, conn_expires_at - now); out: - spin_unlock(&rxrpc_client_conn_cache_lock); - spin_unlock(&rxrpc_client_conn_discard_mutex); + spin_unlock(&rxnet->client_conn_cache_lock); + spin_unlock(&rxnet->client_conn_discard_lock); _leave(""); } @@ -1065,17 +1099,17 @@ out: * Preemptively destroy all the client connection records rather than waiting * for them to time out */ -void __exit rxrpc_destroy_all_client_connections(void) +void rxrpc_destroy_all_client_connections(struct rxrpc_net *rxnet) { _enter(""); - spin_lock(&rxrpc_client_conn_cache_lock); - rxrpc_kill_all_client_conns = true; - spin_unlock(&rxrpc_client_conn_cache_lock); + spin_lock(&rxnet->client_conn_cache_lock); + rxnet->kill_all_client_conns = true; + spin_unlock(&rxnet->client_conn_cache_lock); - cancel_delayed_work(&rxrpc_client_conn_reap); + cancel_delayed_work(&rxnet->client_conn_reaper); - if (!queue_delayed_work(rxrpc_workqueue, &rxrpc_client_conn_reap, 0)) + if (!queue_delayed_work(rxrpc_workqueue, &rxnet->client_conn_reaper, 0)) _debug("destroy: queue failed"); _leave(""); diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 46babcf82ce8..59a51a56e7c8 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -74,7 +74,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, pkt.whdr.userStatus = 0; pkt.whdr.securityIndex = conn->security_ix; pkt.whdr._rsvd = 0; - pkt.whdr.serviceId = htons(chan->last_service_id); + pkt.whdr.serviceId = htons(conn->service_id); len = sizeof(pkt.whdr); switch (chan->last_type) { @@ -208,7 +208,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, whdr.userStatus = 0; whdr.securityIndex = conn->security_ix; whdr._rsvd = 0; - whdr.serviceId = htons(conn->params.service_id); + whdr.serviceId = htons(conn->service_id); word = htonl(conn->local_abort); diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index b0ecb770fdce..929b50d5afe8 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -22,13 +22,6 @@ */ unsigned int rxrpc_connection_expiry = 10 * 60; -static void rxrpc_connection_reaper(struct work_struct *work); - -LIST_HEAD(rxrpc_connections); -LIST_HEAD(rxrpc_connection_proc_list); -DEFINE_RWLOCK(rxrpc_connection_lock); -static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper); - static void rxrpc_destroy_connection(struct rcu_head *); /* @@ -174,7 +167,6 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn, * through the channel, whilst disposing of the actual call record. */ trace_rxrpc_disconnect_call(call); - chan->last_service_id = call->service_id; if (call->abort_code) { chan->last_abort = call->abort_code; chan->last_type = RXRPC_PACKET_TYPE_ABORT; @@ -201,6 +193,8 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) { struct rxrpc_connection *conn = call->conn; + call->peer->cong_cwnd = call->cong_cwnd; + spin_lock_bh(&conn->params.peer->lock); hlist_del_init(&call->error_link); spin_unlock_bh(&conn->params.peer->lock); @@ -222,15 +216,17 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) */ void rxrpc_kill_connection(struct rxrpc_connection *conn) { + struct rxrpc_net *rxnet = conn->params.local->rxnet; + ASSERT(!rcu_access_pointer(conn->channels[0].call) && !rcu_access_pointer(conn->channels[1].call) && !rcu_access_pointer(conn->channels[2].call) && !rcu_access_pointer(conn->channels[3].call)); ASSERT(list_empty(&conn->cache_link)); - write_lock(&rxrpc_connection_lock); + write_lock(&rxnet->conn_lock); list_del_init(&conn->proc_link); - write_unlock(&rxrpc_connection_lock); + write_unlock(&rxnet->conn_lock); /* Drain the Rx queue. Note that even though we've unpublished, an * incoming packet could still be being added to our Rx queue, so we @@ -309,14 +305,17 @@ rxrpc_get_connection_maybe(struct rxrpc_connection *conn) */ void rxrpc_put_service_conn(struct rxrpc_connection *conn) { + struct rxrpc_net *rxnet; const void *here = __builtin_return_address(0); int n; n = atomic_dec_return(&conn->usage); trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here); ASSERTCMP(n, >=, 0); - if (n == 0) - rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0); + if (n == 0) { + rxnet = conn->params.local->rxnet; + rxrpc_queue_delayed_work(&rxnet->service_conn_reaper, 0); + } } /* @@ -348,9 +347,12 @@ static void rxrpc_destroy_connection(struct rcu_head *rcu) /* * reap dead service connections */ -static void rxrpc_connection_reaper(struct work_struct *work) +void rxrpc_service_connection_reaper(struct work_struct *work) { struct rxrpc_connection *conn, *_p; + struct rxrpc_net *rxnet = + container_of(to_delayed_work(work), + struct rxrpc_net, service_conn_reaper); unsigned long reap_older_than, earliest, idle_timestamp, now; LIST_HEAD(graveyard); @@ -361,8 +363,8 @@ static void rxrpc_connection_reaper(struct work_struct *work) reap_older_than = now - rxrpc_connection_expiry * HZ; earliest = ULONG_MAX; - write_lock(&rxrpc_connection_lock); - list_for_each_entry_safe(conn, _p, &rxrpc_connections, link) { + write_lock(&rxnet->conn_lock); + list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) { ASSERTCMP(atomic_read(&conn->usage), >, 0); if (likely(atomic_read(&conn->usage) > 1)) continue; @@ -393,12 +395,12 @@ static void rxrpc_connection_reaper(struct work_struct *work) list_move_tail(&conn->link, &graveyard); } - write_unlock(&rxrpc_connection_lock); + write_unlock(&rxnet->conn_lock); if (earliest != ULONG_MAX) { _debug("reschedule reaper %ld", (long) earliest - now); ASSERT(time_after(earliest, now)); - rxrpc_queue_delayed_work(&rxrpc_connection_reap, + rxrpc_queue_delayed_work(&rxnet->client_conn_reaper, earliest - now); } @@ -418,36 +420,30 @@ static void rxrpc_connection_reaper(struct work_struct *work) * preemptively destroy all the service connection records rather than * waiting for them to time out */ -void __exit rxrpc_destroy_all_connections(void) +void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet) { struct rxrpc_connection *conn, *_p; bool leak = false; _enter(""); - rxrpc_destroy_all_client_connections(); + rxrpc_destroy_all_client_connections(rxnet); rxrpc_connection_expiry = 0; - cancel_delayed_work(&rxrpc_connection_reap); - rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0); + cancel_delayed_work(&rxnet->client_conn_reaper); + rxrpc_queue_delayed_work(&rxnet->client_conn_reaper, 0); flush_workqueue(rxrpc_workqueue); - write_lock(&rxrpc_connection_lock); - list_for_each_entry_safe(conn, _p, &rxrpc_connections, link) { + write_lock(&rxnet->conn_lock); + list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) { pr_err("AF_RXRPC: Leaked conn %p {%d}\n", conn, atomic_read(&conn->usage)); leak = true; } - write_unlock(&rxrpc_connection_lock); + write_unlock(&rxnet->conn_lock); BUG_ON(leak); - ASSERT(list_empty(&rxrpc_connection_proc_list)); - - /* Make sure the local and peer records pinned by any dying connections - * are released. - */ - rcu_barrier(); - rxrpc_destroy_client_conn_ids(); + ASSERT(list_empty(&rxnet->conn_proc_list)); _leave(""); } diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index eef551f40dc2..e60fcd2a4a02 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -121,7 +121,8 @@ replace_old_connection: * Preallocate a service connection. The connection is placed on the proc and * reap lists so that we don't have to get the lock from BH context. */ -struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t gfp) +struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxnet, + gfp_t gfp) { struct rxrpc_connection *conn = rxrpc_alloc_connection(gfp); @@ -132,10 +133,10 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t gfp) conn->state = RXRPC_CONN_SERVICE_PREALLOC; atomic_set(&conn->usage, 2); - write_lock(&rxrpc_connection_lock); - list_add_tail(&conn->link, &rxrpc_connections); - list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list); - write_unlock(&rxrpc_connection_lock); + write_lock(&rxnet->conn_lock); + list_add_tail(&conn->link, &rxnet->service_conns); + list_add_tail(&conn->proc_link, &rxnet->conn_proc_list); + write_unlock(&rxnet->conn_lock); trace_rxrpc_conn(conn, rxrpc_conn_new_service, atomic_read(&conn->usage), @@ -149,7 +150,8 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t gfp) * Set up an incoming connection. This is called in BH context with the RCU * read lock held. */ -void rxrpc_new_incoming_connection(struct rxrpc_connection *conn, +void rxrpc_new_incoming_connection(struct rxrpc_sock *rx, + struct rxrpc_connection *conn, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); @@ -159,6 +161,7 @@ void rxrpc_new_incoming_connection(struct rxrpc_connection *conn, conn->proto.epoch = sp->hdr.epoch; conn->proto.cid = sp->hdr.cid & RXRPC_CIDMASK; conn->params.service_id = sp->hdr.serviceId; + conn->service_id = sp->hdr.serviceId; conn->security_ix = sp->hdr.securityIndex; conn->out_clientflag = 0; if (conn->security_ix) @@ -166,6 +169,14 @@ void rxrpc_new_incoming_connection(struct rxrpc_connection *conn, else conn->state = RXRPC_CONN_SERVICE; + /* See if we should upgrade the service. This can only happen on the + * first packet on a new connection. Once done, it applies to all + * subsequent calls on that connection. + */ + if (sp->hdr.userStatus == RXRPC_USERSTATUS_SERVICE_UPGRADE && + conn->service_id == rx->service_upgrade.from) + conn->service_id = rx->service_upgrade.to; + /* Make the connection a target for incoming packets. */ rxrpc_publish_service_conn(conn->params.peer, conn); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 45dba732a3b4..e56e23ed2229 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1142,6 +1142,13 @@ void rxrpc_data_ready(struct sock *udp_sk) if (sp->hdr.securityIndex != conn->security_ix) goto wrong_security; + if (sp->hdr.serviceId != conn->service_id) { + if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) || + conn->service_id != conn->params.service_id) + goto reupgrade; + conn->service_id = sp->hdr.serviceId; + } + if (sp->hdr.callNumber == 0) { /* Connection-level packet */ _debug("CONN %p {%d}", conn, conn->debug_id); @@ -1194,6 +1201,9 @@ void rxrpc_data_ready(struct sock *udp_sk) rxrpc_input_implicit_end_call(conn, call); call = NULL; } + + if (call && sp->hdr.serviceId != call->service_id) + call->service_id = sp->hdr.serviceId; } else { skew = 0; call = NULL; @@ -1237,11 +1247,18 @@ wrong_security: skb->priority = RXKADINCONSISTENCY; goto post_abort; +reupgrade: + rcu_read_unlock(); + trace_rxrpc_abort("UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RX_PROTOCOL_ERROR, EBADMSG); + goto protocol_error; + bad_message_unlock: rcu_read_unlock(); bad_message: trace_rxrpc_abort("BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_PROTOCOL_ERROR, EBADMSG); +protocol_error: skb->priority = RX_PROTOCOL_ERROR; post_abort: skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT; diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index ff4864d550b8..38b99db30e54 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -25,9 +25,6 @@ static void rxrpc_local_processor(struct work_struct *); static void rxrpc_local_rcu(struct rcu_head *); -static DEFINE_MUTEX(rxrpc_local_mutex); -static LIST_HEAD(rxrpc_local_endpoints); - /* * Compare a local to an address. Return -ve, 0 or +ve to indicate less than, * same or greater than. @@ -77,13 +74,15 @@ static long rxrpc_local_cmp_key(const struct rxrpc_local *local, /* * Allocate a new local endpoint. */ -static struct rxrpc_local *rxrpc_alloc_local(const struct sockaddr_rxrpc *srx) +static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet, + const struct sockaddr_rxrpc *srx) { struct rxrpc_local *local; local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL); if (local) { atomic_set(&local->usage, 1); + local->rxnet = rxnet; INIT_LIST_HEAD(&local->link); INIT_WORK(&local->processor, rxrpc_local_processor); init_rwsem(&local->defrag_sem); @@ -95,6 +94,7 @@ static struct rxrpc_local *rxrpc_alloc_local(const struct sockaddr_rxrpc *srx) rwlock_init(&local->services_lock); local->debug_id = atomic_inc_return(&rxrpc_debug_id); memcpy(&local->srx, srx, sizeof(*srx)); + local->srx.srx_service = 0; } _leave(" = %p", local); @@ -105,7 +105,7 @@ static struct rxrpc_local *rxrpc_alloc_local(const struct sockaddr_rxrpc *srx) * create the local socket * - must be called with rxrpc_local_mutex locked */ -static int rxrpc_open_socket(struct rxrpc_local *local) +static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) { struct sock *sock; int ret, opt; @@ -114,7 +114,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local) local, local->srx.transport_type, local->srx.transport.family); /* create a socket to represent the local endpoint */ - ret = sock_create_kern(&init_net, local->srx.transport.family, + ret = sock_create_kern(net, local->srx.transport.family, local->srx.transport_type, 0, &local->socket); if (ret < 0) { _leave(" = %d [socket]", ret); @@ -172,9 +172,11 @@ error: /* * Look up or create a new local endpoint using the specified local address. */ -struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx) +struct rxrpc_local *rxrpc_lookup_local(struct net *net, + const struct sockaddr_rxrpc *srx) { struct rxrpc_local *local; + struct rxrpc_net *rxnet = rxrpc_net(net); struct list_head *cursor; const char *age; long diff; @@ -183,10 +185,10 @@ struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx) _enter("{%d,%d,%pISp}", srx->transport_type, srx->transport.family, &srx->transport); - mutex_lock(&rxrpc_local_mutex); + mutex_lock(&rxnet->local_mutex); - for (cursor = rxrpc_local_endpoints.next; - cursor != &rxrpc_local_endpoints; + for (cursor = rxnet->local_endpoints.next; + cursor != &rxnet->local_endpoints; cursor = cursor->next) { local = list_entry(cursor, struct rxrpc_local, link); @@ -220,11 +222,11 @@ struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx) goto found; } - local = rxrpc_alloc_local(srx); + local = rxrpc_alloc_local(rxnet, srx); if (!local) goto nomem; - ret = rxrpc_open_socket(local); + ret = rxrpc_open_socket(local, net); if (ret < 0) goto sock_error; @@ -232,7 +234,7 @@ struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx) age = "new"; found: - mutex_unlock(&rxrpc_local_mutex); + mutex_unlock(&rxnet->local_mutex); _net("LOCAL %s %d {%pISp}", age, local->debug_id, &local->srx.transport); @@ -243,13 +245,13 @@ found: nomem: ret = -ENOMEM; sock_error: - mutex_unlock(&rxrpc_local_mutex); + mutex_unlock(&rxnet->local_mutex); kfree(local); _leave(" = %d", ret); return ERR_PTR(ret); addr_in_use: - mutex_unlock(&rxrpc_local_mutex); + mutex_unlock(&rxnet->local_mutex); _leave(" = -EADDRINUSE"); return ERR_PTR(-EADDRINUSE); } @@ -273,6 +275,7 @@ void __rxrpc_put_local(struct rxrpc_local *local) static void rxrpc_local_destroyer(struct rxrpc_local *local) { struct socket *socket = local->socket; + struct rxrpc_net *rxnet = local->rxnet; _enter("%d", local->debug_id); @@ -286,9 +289,9 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local) } local->dead = true; - mutex_lock(&rxrpc_local_mutex); + mutex_lock(&rxnet->local_mutex); list_del_init(&local->link); - mutex_unlock(&rxrpc_local_mutex); + mutex_unlock(&rxnet->local_mutex); ASSERT(RB_EMPTY_ROOT(&local->client_conns)); ASSERT(!local->service); @@ -357,7 +360,7 @@ static void rxrpc_local_rcu(struct rcu_head *rcu) /* * Verify the local endpoint list is empty by this point. */ -void __exit rxrpc_destroy_all_locals(void) +void rxrpc_destroy_all_locals(struct rxrpc_net *rxnet) { struct rxrpc_local *local; @@ -365,15 +368,13 @@ void __exit rxrpc_destroy_all_locals(void) flush_workqueue(rxrpc_workqueue); - if (!list_empty(&rxrpc_local_endpoints)) { - mutex_lock(&rxrpc_local_mutex); - list_for_each_entry(local, &rxrpc_local_endpoints, link) { + if (!list_empty(&rxnet->local_endpoints)) { + mutex_lock(&rxnet->local_mutex); + list_for_each_entry(local, &rxnet->local_endpoints, link) { pr_err("AF_RXRPC: Leaked local %p {%d}\n", local, atomic_read(&local->usage)); } - mutex_unlock(&rxrpc_local_mutex); + mutex_unlock(&rxnet->local_mutex); BUG(); } - - rcu_barrier(); } diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c new file mode 100644 index 000000000000..7edceb8522f5 --- /dev/null +++ b/net/rxrpc/net_ns.c @@ -0,0 +1,84 @@ +/* rxrpc network namespace handling. + * + * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/proc_fs.h> +#include "ar-internal.h" + +unsigned int rxrpc_net_id; + +/* + * Initialise a per-network namespace record. + */ +static __net_init int rxrpc_init_net(struct net *net) +{ + struct rxrpc_net *rxnet = rxrpc_net(net); + int ret; + + get_random_bytes(&rxnet->epoch, sizeof(rxnet->epoch)); + rxnet->epoch |= RXRPC_RANDOM_EPOCH; + + INIT_LIST_HEAD(&rxnet->calls); + rwlock_init(&rxnet->call_lock); + + INIT_LIST_HEAD(&rxnet->conn_proc_list); + INIT_LIST_HEAD(&rxnet->service_conns); + rwlock_init(&rxnet->conn_lock); + INIT_DELAYED_WORK(&rxnet->service_conn_reaper, + rxrpc_service_connection_reaper); + + rxnet->nr_client_conns = 0; + rxnet->nr_active_client_conns = 0; + rxnet->kill_all_client_conns = false; + spin_lock_init(&rxnet->client_conn_cache_lock); + spin_lock_init(&rxnet->client_conn_discard_lock); + INIT_LIST_HEAD(&rxnet->waiting_client_conns); + INIT_LIST_HEAD(&rxnet->active_client_conns); + INIT_LIST_HEAD(&rxnet->idle_client_conns); + INIT_DELAYED_WORK(&rxnet->client_conn_reaper, + rxrpc_discard_expired_client_conns); + + INIT_LIST_HEAD(&rxnet->local_endpoints); + mutex_init(&rxnet->local_mutex); + hash_init(rxnet->peer_hash); + spin_lock_init(&rxnet->peer_hash_lock); + + ret = -ENOMEM; + rxnet->proc_net = proc_net_mkdir(net, "rxrpc", net->proc_net); + if (!rxnet->proc_net) + goto err_proc; + + proc_create("calls", 0444, rxnet->proc_net, &rxrpc_call_seq_fops); + proc_create("conns", 0444, rxnet->proc_net, &rxrpc_connection_seq_fops); + return 0; + +err_proc: + return ret; +} + +/* + * Clean up a per-network namespace record. + */ +static __net_exit void rxrpc_exit_net(struct net *net) +{ + struct rxrpc_net *rxnet = rxrpc_net(net); + + rxrpc_destroy_all_calls(rxnet); + rxrpc_destroy_all_connections(rxnet); + rxrpc_destroy_all_locals(rxnet); + proc_remove(rxnet->proc_net); +} + +struct pernet_operations rxrpc_net_ops = { + .init = rxrpc_init_net, + .exit = rxrpc_exit_net, + .id = &rxrpc_net_id, + .size = sizeof(struct rxrpc_net), +}; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 5dab1ff3a6c2..5bd2d0fa4a03 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -292,6 +292,10 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, whdr._rsvd = htons(sp->hdr._rsvd); whdr.serviceId = htons(call->service_id); + if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) && + sp->hdr.seq == 1) + whdr.userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE; + iov[0].iov_base = &whdr; iov[0].iov_len = sizeof(whdr); iov[1].iov_base = skb->head; diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 862eea6b266c..5787f97f5330 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -26,9 +26,6 @@ #include <net/ip6_route.h> #include "ar-internal.h" -static DEFINE_HASHTABLE(rxrpc_peer_hash, 10); -static DEFINE_SPINLOCK(rxrpc_peer_hash_lock); - /* * Hash a peer key. */ @@ -124,8 +121,9 @@ static struct rxrpc_peer *__rxrpc_lookup_peer_rcu( unsigned long hash_key) { struct rxrpc_peer *peer; + struct rxrpc_net *rxnet = local->rxnet; - hash_for_each_possible_rcu(rxrpc_peer_hash, peer, hash_link, hash_key) { + hash_for_each_possible_rcu(rxnet->peer_hash, peer, hash_link, hash_key) { if (rxrpc_peer_cmp_key(peer, local, srx, hash_key) == 0) { if (atomic_read(&peer->usage) == 0) return NULL; @@ -230,6 +228,13 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) seqlock_init(&peer->service_conn_lock); spin_lock_init(&peer->lock); peer->debug_id = atomic_inc_return(&rxrpc_debug_id); + + if (RXRPC_TX_SMSS > 2190) + peer->cong_cwnd = 2; + else if (RXRPC_TX_SMSS > 1095) + peer->cong_cwnd = 3; + else + peer->cong_cwnd = 4; } _leave(" = %p", peer); @@ -301,13 +306,14 @@ struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *prealloc) { struct rxrpc_peer *peer; + struct rxrpc_net *rxnet = local->rxnet; unsigned long hash_key; hash_key = rxrpc_peer_hash_key(local, &prealloc->srx); prealloc->local = local; rxrpc_init_peer(prealloc, hash_key); - spin_lock(&rxrpc_peer_hash_lock); + spin_lock(&rxnet->peer_hash_lock); /* Need to check that we aren't racing with someone else */ peer = __rxrpc_lookup_peer_rcu(local, &prealloc->srx, hash_key); @@ -315,10 +321,10 @@ struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *local, peer = NULL; if (!peer) { peer = prealloc; - hash_add_rcu(rxrpc_peer_hash, &peer->hash_link, hash_key); + hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key); } - spin_unlock(&rxrpc_peer_hash_lock); + spin_unlock(&rxnet->peer_hash_lock); return peer; } @@ -329,6 +335,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, struct sockaddr_rxrpc *srx, gfp_t gfp) { struct rxrpc_peer *peer, *candidate; + struct rxrpc_net *rxnet = local->rxnet; unsigned long hash_key = rxrpc_peer_hash_key(local, srx); _enter("{%pISp}", &srx->transport); @@ -350,17 +357,17 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, return NULL; } - spin_lock_bh(&rxrpc_peer_hash_lock); + spin_lock_bh(&rxnet->peer_hash_lock); /* Need to check that we aren't racing with someone else */ peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key); if (peer && !rxrpc_get_peer_maybe(peer)) peer = NULL; if (!peer) - hash_add_rcu(rxrpc_peer_hash, + hash_add_rcu(rxnet->peer_hash, &candidate->hash_link, hash_key); - spin_unlock_bh(&rxrpc_peer_hash_lock); + spin_unlock_bh(&rxnet->peer_hash_lock); if (peer) kfree(candidate); @@ -379,11 +386,13 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, */ void __rxrpc_put_peer(struct rxrpc_peer *peer) { + struct rxrpc_net *rxnet = peer->local->rxnet; + ASSERT(hlist_empty(&peer->error_targets)); - spin_lock_bh(&rxrpc_peer_hash_lock); + spin_lock_bh(&rxnet->peer_hash_lock); hash_del_rcu(&peer->hash_link); - spin_unlock_bh(&rxrpc_peer_hash_lock); + spin_unlock_bh(&rxnet->peer_hash_lock); kfree_rcu(peer, rcu); } diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index b9bcfbfb095c..7421656963a9 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -30,19 +30,25 @@ static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = { */ static void *rxrpc_call_seq_start(struct seq_file *seq, loff_t *_pos) { + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + rcu_read_lock(); - read_lock(&rxrpc_call_lock); - return seq_list_start_head(&rxrpc_calls, *_pos); + read_lock(&rxnet->call_lock); + return seq_list_start_head(&rxnet->calls, *_pos); } static void *rxrpc_call_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - return seq_list_next(v, &rxrpc_calls, pos); + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + + return seq_list_next(v, &rxnet->calls, pos); } static void rxrpc_call_seq_stop(struct seq_file *seq, void *v) { - read_unlock(&rxrpc_call_lock); + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + + read_unlock(&rxnet->call_lock); rcu_read_unlock(); } @@ -52,10 +58,11 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) struct rxrpc_sock *rx; struct rxrpc_peer *peer; struct rxrpc_call *call; + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); rxrpc_seq_t tx_hard_ack, rx_hard_ack; char lbuff[50], rbuff[50]; - if (v == &rxrpc_calls) { + if (v == &rxnet->calls) { seq_puts(seq, "Proto Local " " Remote " @@ -113,7 +120,8 @@ static const struct seq_operations rxrpc_call_seq_ops = { static int rxrpc_call_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &rxrpc_call_seq_ops); + return seq_open_net(inode, file, &rxrpc_call_seq_ops, + sizeof(struct seq_net_private)); } const struct file_operations rxrpc_call_seq_fops = { @@ -129,27 +137,34 @@ const struct file_operations rxrpc_call_seq_fops = { */ static void *rxrpc_connection_seq_start(struct seq_file *seq, loff_t *_pos) { - read_lock(&rxrpc_connection_lock); - return seq_list_start_head(&rxrpc_connection_proc_list, *_pos); + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + + read_lock(&rxnet->conn_lock); + return seq_list_start_head(&rxnet->conn_proc_list, *_pos); } static void *rxrpc_connection_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - return seq_list_next(v, &rxrpc_connection_proc_list, pos); + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + + return seq_list_next(v, &rxnet->conn_proc_list, pos); } static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v) { - read_unlock(&rxrpc_connection_lock); + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + + read_unlock(&rxnet->conn_lock); } static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) { struct rxrpc_connection *conn; + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); char lbuff[50], rbuff[50]; - if (v == &rxrpc_connection_proc_list) { + if (v == &rxnet->conn_proc_list) { seq_puts(seq, "Proto Local " " Remote " @@ -175,7 +190,7 @@ print: " %s %08x %08x %08x\n", lbuff, rbuff, - conn->params.service_id, + conn->service_id, conn->proto.cid, rxrpc_conn_is_service(conn) ? "Svc" : "Clt", atomic_read(&conn->usage), @@ -197,7 +212,8 @@ static const struct seq_operations rxrpc_connection_seq_ops = { static int rxrpc_connection_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &rxrpc_connection_seq_ops); + return seq_open_net(inode, file, &rxrpc_connection_seq_ops, + sizeof(struct seq_net_private)); } const struct file_operations rxrpc_connection_seq_fops = { diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index f9caf3b77509..bdece21f313d 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -522,8 +522,11 @@ try_again: } if (msg->msg_name) { - size_t len = sizeof(call->conn->params.peer->srx); - memcpy(msg->msg_name, &call->conn->params.peer->srx, len); + struct sockaddr_rxrpc *srx = msg->msg_name; + size_t len = sizeof(call->peer->srx); + + memcpy(msg->msg_name, &call->peer->srx, len); + srx->srx_service = call->service_id; msg->msg_namelen = len; } diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 1bb9b2ccc267..46d1a1f0b55b 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -227,7 +227,9 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, len &= ~(call->conn->size_align - 1); sg_init_table(sg, nsg); - skb_to_sgvec(skb, sg, 0, len); + err = skb_to_sgvec(skb, sg, 0, len); + if (unlikely(err < 0)) + goto out; skcipher_request_set_crypt(req, sg, sg, len, iv.x); crypto_skcipher_encrypt(req); @@ -324,7 +326,7 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, bool aborted; u32 data_size, buf; u16 check; - int nsg; + int nsg, ret; _enter(""); @@ -342,7 +344,9 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, goto nomem; sg_init_table(sg, nsg); - skb_to_sgvec(skb, sg, offset, 8); + ret = skb_to_sgvec(skb, sg, offset, 8); + if (unlikely(ret < 0)) + return ret; /* start the decryption afresh */ memset(&iv, 0, sizeof(iv)); @@ -409,7 +413,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, bool aborted; u32 data_size, buf; u16 check; - int nsg; + int nsg, ret; _enter(",{%d}", skb->len); @@ -434,7 +438,12 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, } sg_init_table(sg, nsg); - skb_to_sgvec(skb, sg, offset, len); + ret = skb_to_sgvec(skb, sg, offset, len); + if (unlikely(ret < 0)) { + if (sg != _sg) + kfree(sg); + return ret; + } /* decrypt from the session key */ token = call->conn->params.key->payload.data[0]; @@ -640,7 +649,7 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn) whdr.userStatus = 0; whdr.securityIndex = conn->security_ix; whdr._rsvd = 0; - whdr.serviceId = htons(conn->params.service_id); + whdr.serviceId = htons(conn->service_id); iov[0].iov_base = &whdr; iov[0].iov_len = sizeof(whdr); diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c index 7d921e56e715..e9f428351293 100644 --- a/net/rxrpc/security.c +++ b/net/rxrpc/security.c @@ -121,7 +121,7 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn) _enter(""); - sprintf(kdesc, "%u:%u", conn->params.service_id, conn->security_ix); + sprintf(kdesc, "%u:%u", conn->service_id, conn->security_ix); sec = rxrpc_security_lookup(conn->security_ix); if (!sec) { @@ -133,7 +133,8 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn) read_lock(&local->services_lock); rx = rcu_dereference_protected(local->service, lockdep_is_held(&local->services_lock)); - if (rx && rx->srx.srx_service == conn->params.service_id) + if (rx && (rx->srx.srx_service == conn->service_id || + rx->second_service == conn->service_id)) goto found_service; /* the service appears to have died */ diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 96ffa5d5733b..b0d2cda6ec0a 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -28,6 +28,15 @@ enum rxrpc_command { RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */ }; +struct rxrpc_send_params { + s64 tx_total_len; /* Total Tx data length (if send data) */ + unsigned long user_call_ID; /* User's call ID */ + u32 abort_code; /* Abort code to Tx (if abort) */ + enum rxrpc_command command : 8; /* The command to implement */ + bool exclusive; /* Shared or exclusive call */ + bool upgrade; /* If the connection is upgradeable */ +}; + /* * wait for space to appear in the transmit/ACK window * - caller holds the socket locked @@ -199,6 +208,13 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, more = msg->msg_flags & MSG_MORE; + if (call->tx_total_len != -1) { + if (len > call->tx_total_len) + return -EMSGSIZE; + if (!more && len != call->tx_total_len) + return -EMSGSIZE; + } + skb = call->tx_pending; call->tx_pending = NULL; rxrpc_see_skb(skb, rxrpc_skb_tx_seen); @@ -291,6 +307,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, sp->remain -= copy; skb->mark += copy; copied += copy; + if (call->tx_total_len != -1) + call->tx_total_len -= copy; } /* check for the far side aborting the call or a network error @@ -312,7 +330,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, pad &= conn->size_align - 1; _debug("pad %zu", pad); if (pad) - memset(skb_put(skb, pad), 0, pad); + skb_put_zero(skb, pad); } seq = call->tx_top + 1; @@ -362,18 +380,12 @@ efault: /* * extract control messages from the sendmsg() control buffer */ -static int rxrpc_sendmsg_cmsg(struct msghdr *msg, - unsigned long *user_call_ID, - enum rxrpc_command *command, - u32 *abort_code, - bool *_exclusive) +static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p) { struct cmsghdr *cmsg; bool got_user_ID = false; int len; - *command = RXRPC_CMD_SEND_DATA; - if (msg->msg_controllen == 0) return -EINVAL; @@ -393,42 +405,55 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, if (msg->msg_flags & MSG_CMSG_COMPAT) { if (len != sizeof(u32)) return -EINVAL; - *user_call_ID = *(u32 *) CMSG_DATA(cmsg); + p->user_call_ID = *(u32 *)CMSG_DATA(cmsg); } else { if (len != sizeof(unsigned long)) return -EINVAL; - *user_call_ID = *(unsigned long *) + p->user_call_ID = *(unsigned long *) CMSG_DATA(cmsg); } - _debug("User Call ID %lx", *user_call_ID); got_user_ID = true; break; case RXRPC_ABORT: - if (*command != RXRPC_CMD_SEND_DATA) + if (p->command != RXRPC_CMD_SEND_DATA) return -EINVAL; - *command = RXRPC_CMD_SEND_ABORT; - if (len != sizeof(*abort_code)) + p->command = RXRPC_CMD_SEND_ABORT; + if (len != sizeof(p->abort_code)) return -EINVAL; - *abort_code = *(unsigned int *) CMSG_DATA(cmsg); - _debug("Abort %x", *abort_code); - if (*abort_code == 0) + p->abort_code = *(unsigned int *)CMSG_DATA(cmsg); + if (p->abort_code == 0) return -EINVAL; break; case RXRPC_ACCEPT: - if (*command != RXRPC_CMD_SEND_DATA) + if (p->command != RXRPC_CMD_SEND_DATA) return -EINVAL; - *command = RXRPC_CMD_ACCEPT; + p->command = RXRPC_CMD_ACCEPT; if (len != 0) return -EINVAL; break; case RXRPC_EXCLUSIVE_CALL: - *_exclusive = true; + p->exclusive = true; + if (len != 0) + return -EINVAL; + break; + + case RXRPC_UPGRADE_SERVICE: + p->upgrade = true; if (len != 0) return -EINVAL; break; + + case RXRPC_TX_LENGTH: + if (p->tx_total_len != -1 || len != sizeof(__s64)) + return -EINVAL; + p->tx_total_len = *(__s64 *)CMSG_DATA(cmsg); + if (p->tx_total_len < 0) + return -EINVAL; + break; + default: return -EINVAL; } @@ -436,6 +461,8 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, if (!got_user_ID) return -EINVAL; + if (p->tx_total_len != -1 && p->command != RXRPC_CMD_SEND_DATA) + return -EINVAL; _leave(" = 0"); return 0; } @@ -447,7 +474,7 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, */ static struct rxrpc_call * rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, - unsigned long user_call_ID, bool exclusive) + struct rxrpc_send_params *p) __releases(&rx->sk.sk_lock.slock) { struct rxrpc_conn_parameters cp; @@ -471,9 +498,11 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, cp.local = rx->local; cp.key = rx->key; cp.security_level = rx->min_sec_level; - cp.exclusive = rx->exclusive | exclusive; + cp.exclusive = rx->exclusive | p->exclusive; + cp.upgrade = p->upgrade; cp.service_id = srx->srx_service; - call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, GFP_KERNEL); + call = rxrpc_new_client_call(rx, &cp, srx, p->user_call_ID, + p->tx_total_len, GFP_KERNEL); /* The socket is now unlocked */ _leave(" = %p\n", call); @@ -489,25 +518,29 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) __releases(&rx->sk.sk_lock.slock) { enum rxrpc_call_state state; - enum rxrpc_command cmd; struct rxrpc_call *call; - unsigned long user_call_ID = 0; - bool exclusive = false; - u32 abort_code = 0; int ret; + struct rxrpc_send_params p = { + .tx_total_len = -1, + .user_call_ID = 0, + .abort_code = 0, + .command = RXRPC_CMD_SEND_DATA, + .exclusive = false, + .upgrade = true, + }; + _enter(""); - ret = rxrpc_sendmsg_cmsg(msg, &user_call_ID, &cmd, &abort_code, - &exclusive); + ret = rxrpc_sendmsg_cmsg(msg, &p); if (ret < 0) goto error_release_sock; - if (cmd == RXRPC_CMD_ACCEPT) { + if (p.command == RXRPC_CMD_ACCEPT) { ret = -EINVAL; if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) goto error_release_sock; - call = rxrpc_accept_call(rx, user_call_ID, NULL); + call = rxrpc_accept_call(rx, p.user_call_ID, NULL); /* The socket is now unlocked. */ if (IS_ERR(call)) return PTR_ERR(call); @@ -515,13 +548,12 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) return 0; } - call = rxrpc_find_call_by_user_ID(rx, user_call_ID); + call = rxrpc_find_call_by_user_ID(rx, p.user_call_ID); if (!call) { ret = -EBADSLT; - if (cmd != RXRPC_CMD_SEND_DATA) + if (p.command != RXRPC_CMD_SEND_DATA) goto error_release_sock; - call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID, - exclusive); + call = rxrpc_new_client_call_for_sendmsg(rx, msg, &p); /* The socket is now unlocked... */ if (IS_ERR(call)) return PTR_ERR(call); @@ -545,6 +577,15 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) ret = -ERESTARTSYS; goto error_put; } + + if (p.tx_total_len != -1) { + ret = -EINVAL; + if (call->tx_total_len != -1 || + call->tx_pending || + call->tx_top != 0) + goto error_put; + call->tx_total_len = p.tx_total_len; + } } state = READ_ONCE(call->state); @@ -554,11 +595,11 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) if (state >= RXRPC_CALL_COMPLETE) { /* it's too late for this call */ ret = -ESHUTDOWN; - } else if (cmd == RXRPC_CMD_SEND_ABORT) { + } else if (p.command == RXRPC_CMD_SEND_ABORT) { ret = 0; - if (rxrpc_abort_call("CMD", call, 0, abort_code, -ECONNABORTED)) + if (rxrpc_abort_call("CMD", call, 0, p.abort_code, -ECONNABORTED)) ret = rxrpc_send_abort_packet(call); - } else if (cmd != RXRPC_CMD_SEND_DATA) { + } else if (p.command != RXRPC_CMD_SEND_DATA) { ret = -EINVAL; } else if (rxrpc_is_client_call(call) && state != RXRPC_CALL_CLIENT_SEND_REQUEST) { @@ -662,5 +703,24 @@ bool rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, mutex_unlock(&call->user_mutex); return aborted; } - EXPORT_SYMBOL(rxrpc_kernel_abort_call); + +/** + * rxrpc_kernel_set_tx_length - Set the total Tx length on a call + * @sock: The socket the call is on + * @call: The call to be informed + * @tx_total_len: The amount of data to be transmitted for this call + * + * Allow a kernel service to set the total transmit length on a call. This + * allows buffer-to-packet encrypt-and-copy to be performed. + * + * This function is primarily for use for setting the reply length since the + * request length can be set when beginning the call. + */ +void rxrpc_kernel_set_tx_length(struct socket *sock, struct rxrpc_call *call, + s64 tx_total_len) +{ + WARN_ON(call->tx_total_len != -1); + call->tx_total_len = tx_total_len; +} +EXPORT_SYMBOL(rxrpc_kernel_set_tx_length); diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index 67b02c45271b..b8985d01876a 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -27,7 +27,7 @@ void rxrpc_new_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { const void *here = __builtin_return_address(0); int n = atomic_inc_return(select_skb_count(op)); - trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); + trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here); } /* @@ -38,7 +38,7 @@ void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) const void *here = __builtin_return_address(0); if (skb) { int n = atomic_read(select_skb_count(op)); - trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); + trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here); } } @@ -49,7 +49,7 @@ void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { const void *here = __builtin_return_address(0); int n = atomic_inc_return(select_skb_count(op)); - trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); + trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here); skb_get(skb); } @@ -63,7 +63,7 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) int n; CHECK_SLAB_OKAY(&skb->users); n = atomic_dec_return(select_skb_count(op)); - trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); + trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here); kfree_skb(skb); } } @@ -78,7 +78,7 @@ void rxrpc_lose_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) int n; CHECK_SLAB_OKAY(&skb->users); n = atomic_dec_return(select_skb_count(op)); - trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); + trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here); kfree_skb(skb); } } @@ -93,7 +93,7 @@ void rxrpc_purge_queue(struct sk_buff_head *list) while ((skb = skb_dequeue((list))) != NULL) { int n = atomic_dec_return(select_skb_count(rxrpc_skb_rx_purged)); trace_rxrpc_skb(skb, rxrpc_skb_rx_purged, - atomic_read(&skb->users), n, here); + refcount_read(&skb->users), n, here); kfree_skb(skb); } } diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 9fb84f0de6af..e70ed26485a2 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -649,6 +649,7 @@ config NET_EMATCH_IPSET config NET_CLS_ACT bool "Actions" + select NET_CLS ---help--- Say Y here if you want to use traffic control actions. Actions get attached to classifiers and are invoked after a successful diff --git a/net/sched/act_api.c b/net/sched/act_api.c index a90e8f355c00..aed6cf2e9fd8 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -28,6 +28,31 @@ #include <net/act_api.h> #include <net/netlink.h> +static int tcf_action_goto_chain_init(struct tc_action *a, struct tcf_proto *tp) +{ + u32 chain_index = a->tcfa_action & TC_ACT_EXT_VAL_MASK; + + if (!tp) + return -EINVAL; + a->goto_chain = tcf_chain_get(tp->chain->block, chain_index, true); + if (!a->goto_chain) + return -ENOMEM; + return 0; +} + +static void tcf_action_goto_chain_fini(struct tc_action *a) +{ + tcf_chain_put(a->goto_chain); +} + +static void tcf_action_goto_chain_exec(const struct tc_action *a, + struct tcf_result *res) +{ + const struct tcf_chain *chain = a->goto_chain; + + res->goto_tp = rcu_dereference_bh(chain->filter_chain); +} + static void free_tcf(struct rcu_head *head) { struct tc_action *p = container_of(head, struct tc_action, tcfa_rcu); @@ -39,6 +64,8 @@ static void free_tcf(struct rcu_head *head) kfree(p->act_cookie->data); kfree(p->act_cookie); } + if (p->goto_chain) + tcf_action_goto_chain_fini(p); kfree(p); } @@ -465,6 +492,8 @@ repeat: else /* faulty graph, stop pipeline */ return TC_ACT_OK; } + } else if (TC_ACT_EXT_CMP(ret, TC_ACT_GOTO_CHAIN)) { + tcf_action_goto_chain_exec(a, res); } if (ret != TC_ACT_PIPE) @@ -570,9 +599,9 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb) return c; } -struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla, - struct nlattr *est, char *name, int ovr, - int bind) +struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, + struct nlattr *nla, struct nlattr *est, + char *name, int ovr, int bind) { struct tc_action *a; struct tc_action_ops *a_o; @@ -657,6 +686,17 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla, if (err != ACT_P_CREATED) module_put(a_o->owner); + if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN)) { + err = tcf_action_goto_chain_init(a, tp); + if (err) { + LIST_HEAD(actions); + + list_add_tail(&a->list, &actions); + tcf_action_destroy(&actions, bind); + return ERR_PTR(err); + } + } + return a; err_mod: @@ -680,8 +720,9 @@ static void cleanup_a(struct list_head *actions, int ovr) a->tcfa_refcnt--; } -int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est, - char *name, int ovr, int bind, struct list_head *actions) +int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, + struct nlattr *est, char *name, int ovr, int bind, + struct list_head *actions) { struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *act; @@ -693,7 +734,7 @@ int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est, return err; for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { - act = tcf_action_init_1(net, tb[i], est, name, ovr, bind); + act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind); if (IS_ERR(act)) { err = PTR_ERR(act); goto err; @@ -1020,7 +1061,7 @@ static int tcf_action_add(struct net *net, struct nlattr *nla, int ret = 0; LIST_HEAD(actions); - ret = tcf_action_init(net, nla, NULL, NULL, ovr, 0, &actions); + ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions); if (ret) return ret; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index d33947d6e9d0..9afe1337cfd1 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -123,6 +123,9 @@ static int tcf_bpf_dump_ebpf_info(const struct tcf_bpf *prog, nla_put_string(skb, TCA_ACT_BPF_NAME, prog->bpf_name)) return -EMSGSIZE; + if (nla_put_u32(skb, TCA_ACT_BPF_ID, prog->filter->aux->id)) + return -EMSGSIZE; + nla = nla_reserve(skb, TCA_ACT_BPF_TAG, sizeof(prog->filter->tag)); if (nla == NULL) return -EMSGSIZE; diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index ab6fdbd34db7..3317a2f579da 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -350,6 +350,7 @@ static int tcf_csum_sctp(struct sk_buff *skb, unsigned int ihl, sctph->checksum = sctp_compute_cksum(skb, skb_network_offset(skb) + ihl); skb->ip_summed = CHECKSUM_NONE; + skb->csum_not_inet = 0; return 1; } diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index b9a2f241a5b3..fd7e75679c69 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -67,6 +67,7 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = { [TCA_TUNNEL_KEY_ENC_IPV6_DST] = { .len = sizeof(struct in6_addr) }, [TCA_TUNNEL_KEY_ENC_KEY_ID] = { .type = NLA_U32 }, [TCA_TUNNEL_KEY_ENC_DST_PORT] = {.type = NLA_U16}, + [TCA_TUNNEL_KEY_NO_CSUM] = { .type = NLA_U8 }, }; static int tunnel_key_init(struct net *net, struct nlattr *nla, @@ -83,6 +84,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, bool exists = false; __be16 dst_port = 0; __be64 key_id; + __be16 flags; int ret = 0; int err; @@ -113,6 +115,11 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, key_id = key32_to_tunnel_id(nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID])); + flags = TUNNEL_KEY | TUNNEL_CSUM; + if (tb[TCA_TUNNEL_KEY_NO_CSUM] && + nla_get_u8(tb[TCA_TUNNEL_KEY_NO_CSUM])) + flags &= ~TUNNEL_CSUM; + if (tb[TCA_TUNNEL_KEY_ENC_DST_PORT]) dst_port = nla_get_be16(tb[TCA_TUNNEL_KEY_ENC_DST_PORT]); @@ -125,7 +132,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]); metadata = __ip_tun_set_dst(saddr, daddr, 0, 0, - dst_port, TUNNEL_KEY, + dst_port, flags, key_id, 0); } else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] && tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]) { @@ -136,7 +143,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]); metadata = __ipv6_tun_set_dst(&saddr, &daddr, 0, 0, dst_port, - 0, TUNNEL_KEY, + 0, flags, key_id, 0); } @@ -266,7 +273,9 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, if (nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id) || tunnel_key_dump_addresses(skb, ¶ms->tcft_enc_metadata->u.tun_info) || - nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_DST_PORT, key->tp_dst)) + nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_DST_PORT, key->tp_dst) || + nla_put_u8(skb, TCA_TUNNEL_KEY_NO_CSUM, + !(key->tun_flags & TUNNEL_CSUM))) goto nla_put_failure; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 22f88b35a546..39da0c5801c9 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -106,13 +106,12 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, - struct tcf_proto __rcu **chain, int event) + struct tcf_chain *chain, int event) { - struct tcf_proto __rcu **it_chain; struct tcf_proto *tp; - for (it_chain = chain; (tp = rtnl_dereference(*it_chain)) != NULL; - it_chain = &tp->next) + for (tp = rtnl_dereference(chain->filter_chain); + tp; tp = rtnl_dereference(tp->next)) tfilter_notify(net, oskb, n, tp, 0, event, false); } @@ -125,11 +124,12 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp) if (tp) first = tp->prio - 1; - return first; + return TC_H_MAJ(first); } static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, - u32 prio, u32 parent, struct Qdisc *q) + u32 prio, u32 parent, struct Qdisc *q, + struct tcf_chain *chain) { struct tcf_proto *tp; int err; @@ -165,6 +165,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, tp->prio = prio; tp->classid = parent; tp->q = q; + tp->chain = chain; err = tp->ops->init(tp); if (err) { @@ -185,16 +186,226 @@ static void tcf_proto_destroy(struct tcf_proto *tp) kfree_rcu(tp, rcu); } -void tcf_destroy_chain(struct tcf_proto __rcu **fl) +static struct tcf_chain *tcf_chain_create(struct tcf_block *block, + u32 chain_index) +{ + struct tcf_chain *chain; + + chain = kzalloc(sizeof(*chain), GFP_KERNEL); + if (!chain) + return NULL; + list_add_tail(&chain->list, &block->chain_list); + chain->block = block; + chain->index = chain_index; + chain->refcnt = 1; + return chain; +} + +static void tcf_chain_flush(struct tcf_chain *chain) { struct tcf_proto *tp; - while ((tp = rtnl_dereference(*fl)) != NULL) { - RCU_INIT_POINTER(*fl, tp->next); + if (*chain->p_filter_chain) + RCU_INIT_POINTER(*chain->p_filter_chain, NULL); + while ((tp = rtnl_dereference(chain->filter_chain)) != NULL) { + RCU_INIT_POINTER(chain->filter_chain, tp->next); tcf_proto_destroy(tp); } } -EXPORT_SYMBOL(tcf_destroy_chain); + +static void tcf_chain_destroy(struct tcf_chain *chain) +{ + list_del(&chain->list); + tcf_chain_flush(chain); + kfree(chain); +} + +struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, + bool create) +{ + struct tcf_chain *chain; + + list_for_each_entry(chain, &block->chain_list, list) { + if (chain->index == chain_index) { + chain->refcnt++; + return chain; + } + } + if (create) + return tcf_chain_create(block, chain_index); + else + return NULL; +} +EXPORT_SYMBOL(tcf_chain_get); + +void tcf_chain_put(struct tcf_chain *chain) +{ + /* Destroy unused chain, with exception of chain 0, which is the + * default one and has to be always present. + */ + if (--chain->refcnt == 0 && !chain->filter_chain && chain->index != 0) + tcf_chain_destroy(chain); +} +EXPORT_SYMBOL(tcf_chain_put); + +static void +tcf_chain_filter_chain_ptr_set(struct tcf_chain *chain, + struct tcf_proto __rcu **p_filter_chain) +{ + chain->p_filter_chain = p_filter_chain; +} + +int tcf_block_get(struct tcf_block **p_block, + struct tcf_proto __rcu **p_filter_chain) +{ + struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL); + struct tcf_chain *chain; + int err; + + if (!block) + return -ENOMEM; + INIT_LIST_HEAD(&block->chain_list); + /* Create chain 0 by default, it has to be always present. */ + chain = tcf_chain_create(block, 0); + if (!chain) { + err = -ENOMEM; + goto err_chain_create; + } + tcf_chain_filter_chain_ptr_set(chain, p_filter_chain); + *p_block = block; + return 0; + +err_chain_create: + kfree(block); + return err; +} +EXPORT_SYMBOL(tcf_block_get); + +void tcf_block_put(struct tcf_block *block) +{ + struct tcf_chain *chain, *tmp; + + if (!block) + return; + + list_for_each_entry_safe(chain, tmp, &block->chain_list, list) + tcf_chain_destroy(chain); + kfree(block); +} +EXPORT_SYMBOL(tcf_block_put); + +/* Main classifier routine: scans classifier chain attached + * to this qdisc, (optionally) tests for protocol and asks + * specific classifiers. + */ +int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res, bool compat_mode) +{ + __be16 protocol = tc_skb_protocol(skb); +#ifdef CONFIG_NET_CLS_ACT + const int max_reclassify_loop = 4; + const struct tcf_proto *orig_tp = tp; + const struct tcf_proto *first_tp; + int limit = 0; + +reclassify: +#endif + for (; tp; tp = rcu_dereference_bh(tp->next)) { + int err; + + if (tp->protocol != protocol && + tp->protocol != htons(ETH_P_ALL)) + continue; + + err = tp->classify(skb, tp, res); +#ifdef CONFIG_NET_CLS_ACT + if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) { + first_tp = orig_tp; + goto reset; + } else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) { + first_tp = res->goto_tp; + goto reset; + } +#endif + if (err >= 0) + return err; + } + + return TC_ACT_UNSPEC; /* signal: continue lookup */ +#ifdef CONFIG_NET_CLS_ACT +reset: + if (unlikely(limit++ >= max_reclassify_loop)) { + net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n", + tp->q->ops->id, tp->prio & 0xffff, + ntohs(tp->protocol)); + return TC_ACT_SHOT; + } + + tp = first_tp; + protocol = tc_skb_protocol(skb); + goto reclassify; +#endif +} +EXPORT_SYMBOL(tcf_classify); + +struct tcf_chain_info { + struct tcf_proto __rcu **pprev; + struct tcf_proto __rcu *next; +}; + +static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain_info *chain_info) +{ + return rtnl_dereference(*chain_info->pprev); +} + +static void tcf_chain_tp_insert(struct tcf_chain *chain, + struct tcf_chain_info *chain_info, + struct tcf_proto *tp) +{ + if (chain->p_filter_chain && + *chain_info->pprev == chain->filter_chain) + rcu_assign_pointer(*chain->p_filter_chain, tp); + RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info)); + rcu_assign_pointer(*chain_info->pprev, tp); +} + +static void tcf_chain_tp_remove(struct tcf_chain *chain, + struct tcf_chain_info *chain_info, + struct tcf_proto *tp) +{ + struct tcf_proto *next = rtnl_dereference(chain_info->next); + + if (chain->p_filter_chain && tp == chain->filter_chain) + RCU_INIT_POINTER(*chain->p_filter_chain, next); + RCU_INIT_POINTER(*chain_info->pprev, next); +} + +static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, + struct tcf_chain_info *chain_info, + u32 protocol, u32 prio, + bool prio_allocate) +{ + struct tcf_proto **pprev; + struct tcf_proto *tp; + + /* Check the chain for existence of proto-tcf with this priority */ + for (pprev = &chain->filter_chain; + (tp = rtnl_dereference(*pprev)); pprev = &tp->next) { + if (tp->prio >= prio) { + if (tp->prio == prio) { + if (prio_allocate || + (tp->protocol != protocol && protocol)) + return ERR_PTR(-EINVAL); + } else { + tp = NULL; + } + break; + } + } + chain_info->pprev = pprev; + chain_info->next = tp ? tp->next : NULL; + return tp; +} /* Add/change/delete/get a filter node */ @@ -206,13 +417,14 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, struct tcmsg *t; u32 protocol; u32 prio; - u32 nprio; + bool prio_allocate; u32 parent; + u32 chain_index; struct net_device *dev; struct Qdisc *q; - struct tcf_proto __rcu **back; - struct tcf_proto __rcu **chain; - struct tcf_proto *next; + struct tcf_chain_info chain_info; + struct tcf_chain *chain = NULL; + struct tcf_block *block; struct tcf_proto *tp; const struct Qdisc_class_ops *cops; unsigned long cl; @@ -234,7 +446,7 @@ replay: t = nlmsg_data(n); protocol = TC_H_MIN(t->tcm_info); prio = TC_H_MAJ(t->tcm_info); - nprio = prio; + prio_allocate = false; parent = t->tcm_parent; cl = 0; @@ -250,6 +462,7 @@ replay: */ if (n->nlmsg_flags & NLM_F_CREATE) { prio = TC_H_MAKE(0x80000000U, 0U); + prio_allocate = true; break; } /* fall-through */ @@ -280,7 +493,7 @@ replay: if (!cops) return -EINVAL; - if (cops->tcf_chain == NULL) + if (!cops->tcf_block) return -EOPNOTSUPP; /* Do we search for filter, attached to class? */ @@ -291,34 +504,36 @@ replay: } /* And the last stroke */ - chain = cops->tcf_chain(q, cl); - if (chain == NULL) { + block = cops->tcf_block(q, cl); + if (!block) { err = -EINVAL; goto errout; } + + chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0; + if (chain_index > TC_ACT_EXT_VAL_MASK) { + err = -EINVAL; + goto errout; + } + chain = tcf_chain_get(block, chain_index, + n->nlmsg_type == RTM_NEWTFILTER); + if (!chain) { + err = n->nlmsg_type == RTM_NEWTFILTER ? -ENOMEM : -EINVAL; + goto errout; + } + if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) { tfilter_notify_chain(net, skb, n, chain, RTM_DELTFILTER); - tcf_destroy_chain(chain); + tcf_chain_flush(chain); err = 0; goto errout; } - /* Check the chain for existence of proto-tcf with this priority */ - for (back = chain; - (tp = rtnl_dereference(*back)) != NULL; - back = &tp->next) { - if (tp->prio >= prio) { - if (tp->prio == prio) { - if (!nprio || - (tp->protocol != protocol && protocol)) { - err = -EINVAL; - goto errout; - } - } else { - tp = NULL; - } - break; - } + tp = tcf_chain_tp_find(chain, &chain_info, protocol, + prio, prio_allocate); + if (IS_ERR(tp)) { + err = PTR_ERR(tp); + goto errout; } if (tp == NULL) { @@ -335,11 +550,11 @@ replay: goto errout; } - if (!nprio) - nprio = TC_H_MAJ(tcf_auto_prio(rtnl_dereference(*back))); + if (prio_allocate) + prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info)); tp = tcf_proto_create(nla_data(tca[TCA_KIND]), - protocol, nprio, parent, q); + protocol, prio, parent, q, chain); if (IS_ERR(tp)) { err = PTR_ERR(tp); goto errout; @@ -354,8 +569,7 @@ replay: if (fh == 0) { if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) { - next = rtnl_dereference(tp->next); - RCU_INIT_POINTER(*back, next); + tcf_chain_tp_remove(chain, &chain_info, tp); tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER, false); tcf_proto_destroy(tp); @@ -384,11 +598,10 @@ replay: err = tp->ops->delete(tp, fh, &last); if (err) goto errout; - next = rtnl_dereference(tp->next); tfilter_notify(net, skb, n, tp, t->tcm_handle, RTM_DELTFILTER, false); if (last) { - RCU_INIT_POINTER(*back, next); + tcf_chain_tp_remove(chain, &chain_info, tp); tcf_proto_destroy(tp); } goto errout; @@ -405,10 +618,8 @@ replay: err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE); if (err == 0) { - if (tp_created) { - RCU_INIT_POINTER(tp->next, rtnl_dereference(*back)); - rcu_assign_pointer(*back, tp); - } + if (tp_created) + tcf_chain_tp_insert(chain, &chain_info, tp); tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false); } else { if (tp_created) @@ -416,6 +627,8 @@ replay: } errout: + if (chain) + tcf_chain_put(chain); if (cl) cops->put(q, cl); if (err == -EAGAIN) @@ -444,6 +657,8 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb, tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol); if (nla_put_string(skb, TCA_KIND, tp->ops->kind)) goto nla_put_failure; + if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index)) + goto nla_put_failure; tcm->tcm_handle = fh; if (RTM_DELTFILTER != event) { tcm->tcm_handle = 0; @@ -500,22 +715,76 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, RTM_NEWTFILTER); } +static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb, + struct netlink_callback *cb, + long index_start, long *p_index) +{ + struct net *net = sock_net(skb->sk); + struct tcmsg *tcm = nlmsg_data(cb->nlh); + struct tcf_dump_args arg; + struct tcf_proto *tp; + + for (tp = rtnl_dereference(chain->filter_chain); + tp; tp = rtnl_dereference(tp->next), (*p_index)++) { + if (*p_index < index_start) + continue; + if (TC_H_MAJ(tcm->tcm_info) && + TC_H_MAJ(tcm->tcm_info) != tp->prio) + continue; + if (TC_H_MIN(tcm->tcm_info) && + TC_H_MIN(tcm->tcm_info) != tp->protocol) + continue; + if (*p_index > index_start) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + if (cb->args[1] == 0) { + if (tcf_fill_node(net, skb, tp, 0, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + RTM_NEWTFILTER) <= 0) + return false; + + cb->args[1] = 1; + } + if (!tp->ops->walk) + continue; + arg.w.fn = tcf_node_dump; + arg.skb = skb; + arg.cb = cb; + arg.w.stop = 0; + arg.w.skip = cb->args[1] - 1; + arg.w.count = 0; + tp->ops->walk(tp, &arg.w); + cb->args[1] = arg.w.count + 1; + if (arg.w.stop) + return false; + } + return true; +} + /* called with RTNL */ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); - int t; - int s_t; + struct nlattr *tca[TCA_MAX + 1]; struct net_device *dev; struct Qdisc *q; - struct tcf_proto *tp, __rcu **chain; + struct tcf_block *block; + struct tcf_chain *chain; struct tcmsg *tcm = nlmsg_data(cb->nlh); unsigned long cl = 0; const struct Qdisc_class_ops *cops; - struct tcf_dump_args arg; + long index_start; + long index; + int err; if (nlmsg_len(cb->nlh) < sizeof(*tcm)) return skb->len; + + err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL); + if (err) + return err; + dev = __dev_get_by_index(net, tcm->tcm_ifindex); if (!dev) return skb->len; @@ -529,56 +798,29 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) cops = q->ops->cl_ops; if (!cops) goto errout; - if (cops->tcf_chain == NULL) + if (!cops->tcf_block) goto errout; if (TC_H_MIN(tcm->tcm_parent)) { cl = cops->get(q, tcm->tcm_parent); if (cl == 0) goto errout; } - chain = cops->tcf_chain(q, cl); - if (chain == NULL) + block = cops->tcf_block(q, cl); + if (!block) goto errout; - s_t = cb->args[0]; - - for (tp = rtnl_dereference(*chain), t = 0; - tp; tp = rtnl_dereference(tp->next), t++) { - if (t < s_t) - continue; - if (TC_H_MAJ(tcm->tcm_info) && - TC_H_MAJ(tcm->tcm_info) != tp->prio) - continue; - if (TC_H_MIN(tcm->tcm_info) && - TC_H_MIN(tcm->tcm_info) != tp->protocol) - continue; - if (t > s_t) - memset(&cb->args[1], 0, - sizeof(cb->args)-sizeof(cb->args[0])); - if (cb->args[1] == 0) { - if (tcf_fill_node(net, skb, tp, 0, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, - RTM_NEWTFILTER) <= 0) - break; + index_start = cb->args[0]; + index = 0; - cb->args[1] = 1; - } - if (tp->ops->walk == NULL) + list_for_each_entry(chain, &block->chain_list, list) { + if (tca[TCA_CHAIN] && + nla_get_u32(tca[TCA_CHAIN]) != chain->index) continue; - arg.w.fn = tcf_node_dump; - arg.skb = skb; - arg.cb = cb; - arg.w.stop = 0; - arg.w.skip = cb->args[1] - 1; - arg.w.count = 0; - tp->ops->walk(tp, &arg.w); - cb->args[1] = arg.w.count + 1; - if (arg.w.stop) + if (!tcf_chain_dump(chain, skb, cb, index_start, &index)) break; } - cb->args[0] = t; + cb->args[0] = index; errout: if (cl) @@ -608,8 +850,9 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct tc_action *act; if (exts->police && tb[exts->police]) { - act = tcf_action_init_1(net, tb[exts->police], rate_tlv, - "police", ovr, TCA_ACT_BIND); + act = tcf_action_init_1(net, tp, tb[exts->police], + rate_tlv, "police", ovr, + TCA_ACT_BIND); if (IS_ERR(act)) return PTR_ERR(act); @@ -620,8 +863,8 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, LIST_HEAD(actions); int err, i = 0; - err = tcf_action_init(net, tb[exts->action], rate_tlv, - NULL, ovr, TCA_ACT_BIND, + err = tcf_action_init(net, tp, tb[exts->action], + rate_tlv, NULL, ovr, TCA_ACT_BIND, &actions); if (err) return err; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 5ebeae996e63..f57bd531ba98 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -70,6 +70,7 @@ static int cls_bpf_exec_opcode(int code) case TC_ACT_OK: case TC_ACT_SHOT: case TC_ACT_STOLEN: + case TC_ACT_TRAP: case TC_ACT_REDIRECT: case TC_ACT_UNSPEC: return code; @@ -161,6 +162,7 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, bpf_offload.gen_flags = prog->gen_flags; err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->chain->index, tp->protocol, &offload); if (!err && (cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE)) @@ -564,6 +566,9 @@ static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog, nla_put_string(skb, TCA_BPF_NAME, prog->bpf_name)) return -EMSGSIZE; + if (nla_put_u32(skb, TCA_BPF_ID, prog->filter->aux->id)) + return -EMSGSIZE; + nla = nla_reserve(skb, TCA_BPF_TAG, sizeof(prog->filter->tag)); if (nla == NULL) return -EMSGSIZE; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index ca526c0881bd..7832eb93379b 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -49,6 +49,8 @@ struct fl_flow_key { }; struct flow_dissector_key_ports enc_tp; struct flow_dissector_key_mpls mpls; + struct flow_dissector_key_tcp tcp; + struct flow_dissector_key_ip ip; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -237,7 +239,8 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f) tc->type = TC_SETUP_CLSFLOWER; tc->cls_flower = &offload; - dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, tc); + dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->chain->index, + tp->protocol, tc); } static int fl_hw_replace_filter(struct tcf_proto *tp, @@ -273,8 +276,8 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, tc->type = TC_SETUP_CLSFLOWER; tc->cls_flower = &offload; - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, - tc); + err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->chain->index, tp->protocol, tc); if (!err) f->flags |= TCA_CLS_FLAGS_IN_HW; @@ -300,7 +303,8 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) tc->type = TC_SETUP_CLSFLOWER; tc->cls_flower = &offload; - dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, tc); + dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->chain->index, tp->protocol, tc); } static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f) @@ -424,6 +428,12 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_MPLS_BOS] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_MPLS_TC] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_MPLS_LABEL] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_TCP_FLAGS] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_TCP_FLAGS_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_IP_TOS] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_IP_TOS_MASK] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_IP_TTL] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_IP_TTL_MASK] = { .type = NLA_U8 }, }; static void fl_set_key_val(struct nlattr **tb, @@ -525,6 +535,19 @@ static int fl_set_key_flags(struct nlattr **tb, return 0; } +static void fl_set_key_ip(struct nlattr **tb, + struct flow_dissector_key_ip *key, + struct flow_dissector_key_ip *mask) +{ + fl_set_key_val(tb, &key->tos, TCA_FLOWER_KEY_IP_TOS, + &mask->tos, TCA_FLOWER_KEY_IP_TOS_MASK, + sizeof(key->tos)); + + fl_set_key_val(tb, &key->ttl, TCA_FLOWER_KEY_IP_TTL, + &mask->ttl, TCA_FLOWER_KEY_IP_TTL_MASK, + sizeof(key->ttl)); +} + static int fl_set_key(struct net *net, struct nlattr **tb, struct fl_flow_key *key, struct fl_flow_key *mask) { @@ -567,6 +590,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb, fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO, &mask->basic.ip_proto, TCA_FLOWER_UNSPEC, sizeof(key->basic.ip_proto)); + fl_set_key_ip(tb, &key->ip, &mask->ip); } if (tb[TCA_FLOWER_KEY_IPV4_SRC] || tb[TCA_FLOWER_KEY_IPV4_DST]) { @@ -596,6 +620,9 @@ static int fl_set_key(struct net *net, struct nlattr **tb, fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST, &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK, sizeof(key->tp.dst)); + fl_set_key_val(tb, &key->tcp.flags, TCA_FLOWER_KEY_TCP_FLAGS, + &mask->tcp.flags, TCA_FLOWER_KEY_TCP_FLAGS_MASK, + sizeof(key->tcp.flags)); } else if (key->basic.ip_proto == IPPROTO_UDP) { fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC, &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK, @@ -767,6 +794,10 @@ static void fl_init_dissector(struct cls_fl_head *head, FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, FLOW_DISSECTOR_KEY_PORTS, tp); FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FLOW_DISSECTOR_KEY_IP, ip); + FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FLOW_DISSECTOR_KEY_TCP, tcp); + FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, FLOW_DISSECTOR_KEY_ICMP, icmp); FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, FLOW_DISSECTOR_KEY_ARP, arp); @@ -1074,6 +1105,19 @@ static int fl_dump_key_mpls(struct sk_buff *skb, return 0; } +static int fl_dump_key_ip(struct sk_buff *skb, + struct flow_dissector_key_ip *key, + struct flow_dissector_key_ip *mask) +{ + if (fl_dump_key_val(skb, &key->tos, TCA_FLOWER_KEY_IP_TOS, &mask->tos, + TCA_FLOWER_KEY_IP_TOS_MASK, sizeof(key->tos)) || + fl_dump_key_val(skb, &key->ttl, TCA_FLOWER_KEY_IP_TTL, &mask->ttl, + TCA_FLOWER_KEY_IP_TTL_MASK, sizeof(key->ttl))) + return -1; + + return 0; +} + static int fl_dump_key_vlan(struct sk_buff *skb, struct flow_dissector_key_vlan *vlan_key, struct flow_dissector_key_vlan *vlan_mask) @@ -1187,9 +1231,10 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, if ((key->basic.n_proto == htons(ETH_P_IP) || key->basic.n_proto == htons(ETH_P_IPV6)) && - fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO, + (fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO, &mask->basic.ip_proto, TCA_FLOWER_UNSPEC, - sizeof(key->basic.ip_proto))) + sizeof(key->basic.ip_proto)) || + fl_dump_key_ip(skb, &key->ip, &mask->ip))) goto nla_put_failure; if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS && @@ -1215,7 +1260,10 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, sizeof(key->tp.src)) || fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST, &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK, - sizeof(key->tp.dst)))) + sizeof(key->tp.dst)) || + fl_dump_key_val(skb, &key->tcp.flags, TCA_FLOWER_KEY_TCP_FLAGS, + &mask->tcp.flags, TCA_FLOWER_KEY_TCP_FLAGS_MASK, + sizeof(key->tcp.flags)))) goto nla_put_failure; else if (key->basic.ip_proto == IPPROTO_UDP && (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC, diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 51859b8edd7e..9dc26c32cf32 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -64,8 +64,9 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, offload.cls_mall->exts = &head->exts; offload.cls_mall->cookie = cookie; - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, - &offload); + err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->chain->index, + tp->protocol, &offload); if (!err) head->flags |= TCA_CLS_FLAGS_IN_HW; @@ -86,8 +87,8 @@ static void mall_destroy_hw_filter(struct tcf_proto *tp, offload.cls_mall->exts = NULL; offload.cls_mall->cookie = cookie; - dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, - &offload); + dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->chain->index, + tp->protocol, &offload); } static void mall_destroy(struct tcf_proto *tp) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index d20e72a095d5..2d01195153e6 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -441,7 +441,8 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) offload.cls_u32->command = TC_CLSU32_DELETE_KNODE; offload.cls_u32->knode.handle = handle; dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->protocol, &offload); + tp->chain->index, tp->protocol, + &offload); } } @@ -465,7 +466,8 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, offload.cls_u32->hnode.prio = h->prio; err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->protocol, &offload); + tp->chain->index, tp->protocol, + &offload); if (tc_skip_sw(flags)) return err; @@ -488,7 +490,8 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) offload.cls_u32->hnode.prio = h->prio; dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->protocol, &offload); + tp->chain->index, tp->protocol, + &offload); } } @@ -522,7 +525,8 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, offload.cls_u32->knode.link_handle = n->ht_down->handle; err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->protocol, &offload); + tp->chain->index, tp->protocol, + &offload); if (!err) n->flags |= TCA_CLS_FLAGS_IN_HW; diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index eb0e9bab54c1..d6e97115500b 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -340,7 +340,7 @@ META_COLLECTOR(int_sk_refcnt) *err = -1; return; } - dst->value = atomic_read(&skb->sk->sk_refcnt); + dst->value = refcount_read(&skb->sk->sk_refcnt); } META_COLLECTOR(int_sk_rcvbuf) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index cfdbfa18a95e..bd24a550e0f9 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -163,7 +163,7 @@ int register_qdisc(struct Qdisc_ops *qops) if (!(cops->get && cops->put && cops->walk && cops->leaf)) goto out_einval; - if (cops->tcf_chain && !(cops->bind_tcf && cops->unbind_tcf)) + if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf)) goto out_einval; } @@ -839,7 +839,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, old = dev_graft_qdisc(dev_queue, new); if (new && i > 0) - atomic_inc(&new->refcnt); + refcount_inc(&new->refcnt); if (!ingress) qdisc_destroy(old); @@ -850,7 +850,7 @@ skip: notify_and_destroy(net, skb, n, classid, dev->qdisc, new); if (new && !new->ops->attach) - atomic_inc(&new->refcnt); + refcount_inc(&new->refcnt); dev->qdisc = new ? : &noop_qdisc; if (new && new->ops->attach) @@ -1259,7 +1259,7 @@ replay: if (q == p || (p && check_loop(q, p, 0))) return -ELOOP; - atomic_inc(&q->refcnt); + refcount_inc(&q->refcnt); goto graft; } else { if (!q) @@ -1374,7 +1374,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, tcm->tcm_ifindex = qdisc_dev(q)->ifindex; tcm->tcm_parent = clid; tcm->tcm_handle = q->handle; - tcm->tcm_info = atomic_read(&q->refcnt); + tcm->tcm_info = refcount_read(&q->refcnt); if (nla_put_string(skb, TCA_KIND, q->ops->id)) goto nla_put_failure; if (q->ops->dump && q->ops->dump(q, skb) < 0) @@ -1879,54 +1879,6 @@ done: return skb->len; } -/* Main classifier routine: scans classifier chain attached - * to this qdisc, (optionally) tests for protocol and asks - * specific classifiers. - */ -int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res, bool compat_mode) -{ - __be16 protocol = tc_skb_protocol(skb); -#ifdef CONFIG_NET_CLS_ACT - const int max_reclassify_loop = 4; - const struct tcf_proto *old_tp = tp; - int limit = 0; - -reclassify: -#endif - for (; tp; tp = rcu_dereference_bh(tp->next)) { - int err; - - if (tp->protocol != protocol && - tp->protocol != htons(ETH_P_ALL)) - continue; - - err = tp->classify(skb, tp, res); -#ifdef CONFIG_NET_CLS_ACT - if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) - goto reset; -#endif - if (err >= 0) - return err; - } - - return TC_ACT_UNSPEC; /* signal: continue lookup */ -#ifdef CONFIG_NET_CLS_ACT -reset: - if (unlikely(limit++ >= max_reclassify_loop)) { - net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n", - tp->q->ops->id, tp->prio & 0xffff, - ntohs(tp->protocol)); - return TC_ACT_SHOT; - } - - tp = old_tp; - protocol = tc_skb_protocol(skb); - goto reclassify; -#endif -} -EXPORT_SYMBOL(tc_classify); - #ifdef CONFIG_PROC_FS static int psched_show(struct seq_file *seq, void *v) { diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 40cbceed4de8..572fe2584e48 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -43,6 +43,7 @@ struct atm_flow_data { struct Qdisc *q; /* FIFO, TBF, etc. */ struct tcf_proto __rcu *filter_list; + struct tcf_block *block; struct atm_vcc *vcc; /* VCC; NULL if VCC is closed */ void (*old_pop)(struct atm_vcc *vcc, struct sk_buff *skb); /* chaining */ @@ -143,7 +144,7 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl) list_del_init(&flow->list); pr_debug("atm_tc_put: qdisc %p\n", flow->q); qdisc_destroy(flow->q); - tcf_destroy_chain(&flow->filter_list); + tcf_block_put(flow->block); if (flow->sock) { pr_debug("atm_tc_put: f_count %ld\n", file_count(flow->sock->file)); @@ -274,7 +275,13 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, error = -ENOBUFS; goto err_out; } - RCU_INIT_POINTER(flow->filter_list, NULL); + + error = tcf_block_get(&flow->block, &flow->filter_list); + if (error) { + kfree(flow); + goto err_out; + } + flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid); if (!flow->q) flow->q = &noop_qdisc; @@ -346,14 +353,13 @@ static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker) } } -static struct tcf_proto __rcu **atm_tc_find_tcf(struct Qdisc *sch, - unsigned long cl) +static struct tcf_block *atm_tc_tcf_block(struct Qdisc *sch, unsigned long cl) { struct atm_qdisc_data *p = qdisc_priv(sch); struct atm_flow_data *flow = (struct atm_flow_data *)cl; pr_debug("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); - return flow ? &flow->filter_list : &p->link.filter_list; + return flow ? flow->block : p->link.block; } /* --------------------------- Qdisc operations ---------------------------- */ @@ -377,7 +383,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, list_for_each_entry(flow, &p->flows, list) { fl = rcu_dereference_bh(flow->filter_list); if (fl) { - result = tc_classify(skb, fl, &res, true); + result = tcf_classify(skb, fl, &res, true); if (result < 0) continue; flow = (struct atm_flow_data *)res.class; @@ -400,6 +406,7 @@ done: switch (result) { case TC_ACT_QUEUED: case TC_ACT_STOLEN: + case TC_ACT_TRAP: __qdisc_drop(skb, to_free); return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: @@ -491,7 +498,7 @@ static void sch_atm_dequeue(unsigned long data) ATM_SKB(skb)->vcc = flow->vcc; memcpy(skb_push(skb, flow->hdr_len), flow->hdr, flow->hdr_len); - atomic_add(skb->truesize, + refcount_add(skb->truesize, &sk_atm(flow->vcc)->sk_wmem_alloc); /* atm.atm_options are already set by atm_tc_enqueue */ flow->vcc->send(flow->vcc, skb); @@ -524,6 +531,7 @@ static struct sk_buff *atm_tc_peek(struct Qdisc *sch) static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt) { struct atm_qdisc_data *p = qdisc_priv(sch); + int err; pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); INIT_LIST_HEAD(&p->flows); @@ -534,7 +542,11 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt) if (!p->link.q) p->link.q = &noop_qdisc; pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q); - RCU_INIT_POINTER(p->link.filter_list, NULL); + + err = tcf_block_get(&p->link.block, &p->link.filter_list); + if (err) + return err; + p->link.vcc = NULL; p->link.sock = NULL; p->link.classid = sch->handle; @@ -561,7 +573,7 @@ static void atm_tc_destroy(struct Qdisc *sch) pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p); list_for_each_entry(flow, &p->flows, list) - tcf_destroy_chain(&flow->filter_list); + tcf_block_put(flow->block); list_for_each_entry_safe(flow, tmp, &p->flows, list) { if (flow->ref > 1) @@ -646,7 +658,7 @@ static const struct Qdisc_class_ops atm_class_ops = { .change = atm_tc_change, .delete = atm_tc_delete, .walk = atm_tc_walk, - .tcf_chain = atm_tc_find_tcf, + .tcf_block = atm_tc_tcf_block, .bind_tcf = atm_tc_bind_filter, .unbind_tcf = atm_tc_put, .dump = atm_tc_dump_class, diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 7415859fd4c3..481036f6b54e 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -127,6 +127,7 @@ struct cbq_class { struct tc_cbq_xstats xstats; struct tcf_proto __rcu *filter_list; + struct tcf_block *block; int refcnt; int filters; @@ -233,7 +234,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) /* * Step 2+n. Apply classifier. */ - result = tc_classify(skb, fl, &res, true); + result = tcf_classify(skb, fl, &res, true); if (!fl || result < 0) goto fallback; @@ -253,6 +254,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) switch (result) { case TC_ACT_QUEUED: case TC_ACT_STOLEN: + case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return NULL; @@ -1405,7 +1407,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) WARN_ON(cl->filters); - tcf_destroy_chain(&cl->filter_list); + tcf_block_put(cl->block); qdisc_destroy(cl->q); qdisc_put_rtab(cl->R_tab); gen_kill_estimator(&cl->rate_est); @@ -1430,7 +1432,7 @@ static void cbq_destroy(struct Qdisc *sch) */ for (h = 0; h < q->clhash.hashsize; h++) { hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) - tcf_destroy_chain(&cl->filter_list); + tcf_block_put(cl->block); } for (h = 0; h < q->clhash.hashsize; h++) { hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h], @@ -1585,12 +1587,19 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (cl == NULL) goto failure; + err = tcf_block_get(&cl->block, &cl->filter_list); + if (err) { + kfree(cl); + return err; + } + if (tca[TCA_RATE]) { err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, NULL, qdisc_root_sleeping_running(sch), tca[TCA_RATE]); if (err) { + tcf_block_put(cl->block); kfree(cl); goto failure; } @@ -1688,8 +1697,7 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg) return 0; } -static struct tcf_proto __rcu **cbq_find_tcf(struct Qdisc *sch, - unsigned long arg) +static struct tcf_block *cbq_tcf_block(struct Qdisc *sch, unsigned long arg) { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl = (struct cbq_class *)arg; @@ -1697,7 +1705,7 @@ static struct tcf_proto __rcu **cbq_find_tcf(struct Qdisc *sch, if (cl == NULL) cl = &q->link; - return &cl->filter_list; + return cl->block; } static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent, @@ -1756,7 +1764,7 @@ static const struct Qdisc_class_ops cbq_class_ops = { .change = cbq_change_class, .delete = cbq_delete, .walk = cbq_walk, - .tcf_chain = cbq_find_tcf, + .tcf_block = cbq_tcf_block, .bind_tcf = cbq_bind_filter, .unbind_tcf = cbq_unbind_filter, .dump = cbq_dump_class, diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 58a8c32eab23..a413dc1c2098 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -36,6 +36,7 @@ struct drr_class { struct drr_sched { struct list_head active; struct tcf_proto __rcu *filter_list; + struct tcf_block *block; struct Qdisc_class_hash clhash; }; @@ -190,15 +191,14 @@ static void drr_put_class(struct Qdisc *sch, unsigned long arg) drr_destroy_class(sch, cl); } -static struct tcf_proto __rcu **drr_tcf_chain(struct Qdisc *sch, - unsigned long cl) +static struct tcf_block *drr_tcf_block(struct Qdisc *sch, unsigned long cl) { struct drr_sched *q = qdisc_priv(sch); if (cl) return NULL; - return &q->filter_list; + return q->block; } static unsigned long drr_bind_tcf(struct Qdisc *sch, unsigned long parent, @@ -333,12 +333,13 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch, *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; fl = rcu_dereference_bh(q->filter_list); - result = tc_classify(skb, fl, &res, false); + result = tcf_classify(skb, fl, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_QUEUED: case TC_ACT_STOLEN: + case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return NULL; @@ -431,6 +432,9 @@ static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt) struct drr_sched *q = qdisc_priv(sch); int err; + err = tcf_block_get(&q->block, &q->filter_list); + if (err) + return err; err = qdisc_class_hash_init(&q->clhash); if (err < 0) return err; @@ -462,7 +466,7 @@ static void drr_destroy_qdisc(struct Qdisc *sch) struct hlist_node *next; unsigned int i; - tcf_destroy_chain(&q->filter_list); + tcf_block_put(q->block); for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], @@ -477,7 +481,7 @@ static const struct Qdisc_class_ops drr_class_ops = { .delete = drr_delete_class, .get = drr_get_class, .put = drr_put_class, - .tcf_chain = drr_tcf_chain, + .tcf_block = drr_tcf_block, .bind_tcf = drr_bind_tcf, .unbind_tcf = drr_unbind_tcf, .graft = drr_graft_class, diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 1c0f877f673a..6d94fcc3592a 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -44,6 +44,7 @@ struct mask_value { struct dsmark_qdisc_data { struct Qdisc *q; struct tcf_proto __rcu *filter_list; + struct tcf_block *block; struct mask_value *mv; u16 indices; u8 set_tc_index; @@ -183,11 +184,11 @@ ignore: } } -static inline struct tcf_proto __rcu **dsmark_find_tcf(struct Qdisc *sch, - unsigned long cl) +static struct tcf_block *dsmark_tcf_block(struct Qdisc *sch, unsigned long cl) { struct dsmark_qdisc_data *p = qdisc_priv(sch); - return &p->filter_list; + + return p->block; } /* --------------------------- Qdisc operations ---------------------------- */ @@ -234,7 +235,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, else { struct tcf_result res; struct tcf_proto *fl = rcu_dereference_bh(p->filter_list); - int result = tc_classify(skb, fl, &res, false); + int result = tcf_classify(skb, fl, &res, false); pr_debug("result %d class 0x%04x\n", result, res.classid); @@ -242,6 +243,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, #ifdef CONFIG_NET_CLS_ACT case TC_ACT_QUEUED: case TC_ACT_STOLEN: + case TC_ACT_TRAP: __qdisc_drop(skb, to_free); return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; @@ -342,6 +344,10 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt) if (!opt) goto errout; + err = tcf_block_get(&p->block, &p->filter_list); + if (err) + return err; + err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy, NULL); if (err < 0) goto errout; @@ -400,7 +406,7 @@ static void dsmark_destroy(struct Qdisc *sch) pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); - tcf_destroy_chain(&p->filter_list); + tcf_block_put(p->block); qdisc_destroy(p->q); if (p->mv != p->embedded) kfree(p->mv); @@ -468,7 +474,7 @@ static const struct Qdisc_class_ops dsmark_class_ops = { .change = dsmark_change, .delete = dsmark_delete, .walk = dsmark_walk, - .tcf_chain = dsmark_find_tcf, + .tcf_block = dsmark_tcf_block, .bind_tcf = dsmark_bind_filter, .unbind_tcf = dsmark_put, .dump = dsmark_dump_class, diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index b488721a0059..147fde73a0f5 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -390,9 +390,17 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, q->stat_tcp_retrans++; qdisc_qstats_backlog_inc(sch, skb); if (fq_flow_is_detached(f)) { + struct sock *sk = skb->sk; + fq_flow_add_tail(&q->new_flows, f); if (time_after(jiffies, f->age + q->flow_refill_delay)) f->credit = max_t(u32, f->credit, q->quantum); + if (sk && q->rate_enable) { + if (unlikely(smp_load_acquire(&sk->sk_pacing_status) != + SK_PACING_FQ)) + smp_store_release(&sk->sk_pacing_status, + SK_PACING_FQ); + } q->inactive_flows--; } diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 9201abce928c..337f2d6d81e4 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -55,6 +55,7 @@ struct fq_codel_flow { struct fq_codel_sched_data { struct tcf_proto __rcu *filter_list; /* optional external classifier */ + struct tcf_block *block; struct fq_codel_flow *flows; /* Flows table [flows_cnt] */ u32 *backlogs; /* backlog table [flows_cnt] */ u32 flows_cnt; /* number of flows */ @@ -96,12 +97,13 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch, return fq_codel_hash(q, skb) + 1; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - result = tc_classify(skb, filter, &res, false); + result = tcf_classify(skb, filter, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_STOLEN: case TC_ACT_QUEUED: + case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return 0; @@ -450,7 +452,7 @@ static void fq_codel_destroy(struct Qdisc *sch) { struct fq_codel_sched_data *q = qdisc_priv(sch); - tcf_destroy_chain(&q->filter_list); + tcf_block_put(q->block); kvfree(q->backlogs); kvfree(q->flows); } @@ -459,6 +461,7 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) { struct fq_codel_sched_data *q = qdisc_priv(sch); int i; + int err; sch->limit = 10*1024; q->flows_cnt = 1024; @@ -478,6 +481,10 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) return err; } + err = tcf_block_get(&q->block, &q->filter_list); + if (err) + return err; + if (!q->flows) { q->flows = kvzalloc(q->flows_cnt * sizeof(struct fq_codel_flow), GFP_KERNEL); @@ -589,14 +596,13 @@ static void fq_codel_put(struct Qdisc *q, unsigned long cl) { } -static struct tcf_proto __rcu **fq_codel_find_tcf(struct Qdisc *sch, - unsigned long cl) +static struct tcf_block *fq_codel_tcf_block(struct Qdisc *sch, unsigned long cl) { struct fq_codel_sched_data *q = qdisc_priv(sch); if (cl) return NULL; - return &q->filter_list; + return q->block; } static int fq_codel_dump_class(struct Qdisc *sch, unsigned long cl, @@ -679,7 +685,7 @@ static const struct Qdisc_class_ops fq_codel_class_ops = { .leaf = fq_codel_leaf, .get = fq_codel_get, .put = fq_codel_put, - .tcf_chain = fq_codel_find_tcf, + .tcf_block = fq_codel_tcf_block, .bind_tcf = fq_codel_bind, .unbind_tcf = fq_codel_put, .dump = fq_codel_dump_class, diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 52a2c55f6d9e..57ba406f1437 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -633,7 +633,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch->dequeue = ops->dequeue; sch->dev_queue = dev_queue; dev_hold(dev); - atomic_set(&sch->refcnt, 1); + refcount_set(&sch->refcnt, 1); return sch; errout: @@ -701,7 +701,7 @@ void qdisc_destroy(struct Qdisc *qdisc) const struct Qdisc_ops *ops = qdisc->ops; if (qdisc->flags & TCQ_F_BUILTIN || - !atomic_dec_and_test(&qdisc->refcnt)) + !refcount_dec_and_test(&qdisc->refcnt)) return; #ifdef CONFIG_NET_SCHED @@ -739,7 +739,7 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, spin_lock_bh(root_lock); /* Prune old scheduler */ - if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) + if (oqdisc && refcount_read(&oqdisc->refcnt) <= 1) qdisc_reset(oqdisc); /* ... and graft new one */ @@ -785,7 +785,7 @@ static void attach_default_qdiscs(struct net_device *dev) dev->priv_flags & IFF_NO_QUEUE) { netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); dev->qdisc = txq->qdisc_sleeping; - atomic_inc(&dev->qdisc->refcnt); + refcount_inc(&dev->qdisc->refcnt); } else { qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT); if (qdisc) { diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 5cb82f6c1b06..b52f74610dc7 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -116,6 +116,7 @@ struct hfsc_class { struct gnet_stats_queue qstats; struct net_rate_estimator __rcu *rate_est; struct tcf_proto __rcu *filter_list; /* filter list */ + struct tcf_block *block; unsigned int filter_cnt; /* filter count */ unsigned int level; /* class level in hierarchy */ @@ -1040,12 +1041,19 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl == NULL) return -ENOBUFS; + err = tcf_block_get(&cl->block, &cl->filter_list); + if (err) { + kfree(cl); + return err; + } + if (tca[TCA_RATE]) { err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, NULL, qdisc_root_sleeping_running(sch), tca[TCA_RATE]); if (err) { + tcf_block_put(cl->block); kfree(cl); return err; } @@ -1091,7 +1099,7 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl) { struct hfsc_sched *q = qdisc_priv(sch); - tcf_destroy_chain(&cl->filter_list); + tcf_block_put(cl->block); qdisc_destroy(cl->qdisc); gen_kill_estimator(&cl->rate_est); if (cl != &q->root) @@ -1142,11 +1150,12 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; head = &q->root; tcf = rcu_dereference_bh(q->root.filter_list); - while (tcf && (result = tc_classify(skb, tcf, &res, false)) >= 0) { + while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_QUEUED: case TC_ACT_STOLEN: + case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return NULL; @@ -1261,8 +1270,7 @@ hfsc_unbind_tcf(struct Qdisc *sch, unsigned long arg) cl->filter_cnt--; } -static struct tcf_proto __rcu ** -hfsc_tcf_chain(struct Qdisc *sch, unsigned long arg) +static struct tcf_block *hfsc_tcf_block(struct Qdisc *sch, unsigned long arg) { struct hfsc_sched *q = qdisc_priv(sch); struct hfsc_class *cl = (struct hfsc_class *)arg; @@ -1270,7 +1278,7 @@ hfsc_tcf_chain(struct Qdisc *sch, unsigned long arg) if (cl == NULL) cl = &q->root; - return &cl->filter_list; + return cl->block; } static int @@ -1515,7 +1523,7 @@ hfsc_destroy_qdisc(struct Qdisc *sch) for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) - tcf_destroy_chain(&cl->filter_list); + tcf_block_put(cl->block); } for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], @@ -1662,7 +1670,7 @@ static const struct Qdisc_class_ops hfsc_class_ops = { .put = hfsc_put_class, .bind_tcf = hfsc_bind_tcf, .unbind_tcf = hfsc_unbind_tcf, - .tcf_chain = hfsc_tcf_chain, + .tcf_block = hfsc_tcf_block, .dump = hfsc_dump_class, .dump_stats = hfsc_dump_class_stats, .walk = hfsc_walk diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 570ef3b0c09b..203286ab4427 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -105,6 +105,7 @@ struct htb_class { int quantum; /* but stored for parent-to-leaf return */ struct tcf_proto __rcu *filter_list; /* class attached filters */ + struct tcf_block *block; int filter_cnt; int refcnt; /* usage count of this class */ @@ -156,6 +157,7 @@ struct htb_sched { /* filters for qdisc itself */ struct tcf_proto __rcu *filter_list; + struct tcf_block *block; #define HTB_WARN_TOOMANYEVENTS 0x1 unsigned int warned; /* only one warning */ @@ -231,11 +233,12 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, } *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - while (tcf && (result = tc_classify(skb, tcf, &res, false)) >= 0) { + while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_QUEUED: case TC_ACT_STOLEN: + case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return NULL; @@ -1017,6 +1020,10 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) if (!opt) return -EINVAL; + err = tcf_block_get(&q->block, &q->filter_list); + if (err) + return err; + err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy, NULL); if (err < 0) return err; @@ -1230,7 +1237,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) qdisc_destroy(cl->un.leaf.q); } gen_kill_estimator(&cl->rate_est); - tcf_destroy_chain(&cl->filter_list); + tcf_block_put(cl->block); kfree(cl); } @@ -1248,11 +1255,11 @@ static void htb_destroy(struct Qdisc *sch) * because filter need its target class alive to be able to call * unbind_filter on it (without Oops). */ - tcf_destroy_chain(&q->filter_list); + tcf_block_put(q->block); for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) - tcf_destroy_chain(&cl->filter_list); + tcf_block_put(cl->block); } for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], @@ -1396,6 +1403,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!cl) goto failure; + err = tcf_block_get(&cl->block, &cl->filter_list); + if (err) { + kfree(cl); + goto failure; + } if (htb_rate_est || tca[TCA_RATE]) { err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, @@ -1403,6 +1415,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, qdisc_root_sleeping_running(sch), tca[TCA_RATE] ? : &est.nla); if (err) { + tcf_block_put(cl->block); kfree(cl); goto failure; } @@ -1521,14 +1534,12 @@ failure: return err; } -static struct tcf_proto __rcu **htb_find_tcf(struct Qdisc *sch, - unsigned long arg) +static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg) { struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)arg; - struct tcf_proto __rcu **fl = cl ? &cl->filter_list : &q->filter_list; - return fl; + return cl ? cl->block : q->block; } static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent, @@ -1591,7 +1602,7 @@ static const struct Qdisc_class_ops htb_class_ops = { .change = htb_change_class, .delete = htb_delete, .walk = htb_walk, - .tcf_chain = htb_find_tcf, + .tcf_block = htb_tcf_block, .bind_tcf = htb_bind_filter, .unbind_tcf = htb_unbind_filter, .dump = htb_dump_class, diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 3bab5f66c392..d8a9bebcab90 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -18,6 +18,10 @@ #include <net/pkt_sched.h> #include <net/pkt_cls.h> +struct ingress_sched_data { + struct tcf_block *block; +}; + static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg) { return NULL; @@ -47,16 +51,23 @@ static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker) { } -static struct tcf_proto __rcu **ingress_find_tcf(struct Qdisc *sch, - unsigned long cl) +static struct tcf_block *ingress_tcf_block(struct Qdisc *sch, unsigned long cl) { - struct net_device *dev = qdisc_dev(sch); + struct ingress_sched_data *q = qdisc_priv(sch); - return &dev->ingress_cl_list; + return q->block; } static int ingress_init(struct Qdisc *sch, struct nlattr *opt) { + struct ingress_sched_data *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + int err; + + err = tcf_block_get(&q->block, &dev->ingress_cl_list); + if (err) + return err; + net_inc_ingress_queue(); sch->flags |= TCQ_F_CPUSTATS; @@ -65,9 +76,9 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt) static void ingress_destroy(struct Qdisc *sch) { - struct net_device *dev = qdisc_dev(sch); + struct ingress_sched_data *q = qdisc_priv(sch); - tcf_destroy_chain(&dev->ingress_cl_list); + tcf_block_put(q->block); net_dec_ingress_queue(); } @@ -91,7 +102,7 @@ static const struct Qdisc_class_ops ingress_class_ops = { .get = ingress_get, .put = ingress_put, .walk = ingress_walk, - .tcf_chain = ingress_find_tcf, + .tcf_block = ingress_tcf_block, .tcf_cl_offload = ingress_cl_offload, .bind_tcf = ingress_bind_filter, .unbind_tcf = ingress_put, @@ -100,12 +111,18 @@ static const struct Qdisc_class_ops ingress_class_ops = { static struct Qdisc_ops ingress_qdisc_ops __read_mostly = { .cl_ops = &ingress_class_ops, .id = "ingress", + .priv_size = sizeof(struct ingress_sched_data), .init = ingress_init, .destroy = ingress_destroy, .dump = ingress_dump, .owner = THIS_MODULE, }; +struct clsact_sched_data { + struct tcf_block *ingress_block; + struct tcf_block *egress_block; +}; + static unsigned long clsact_get(struct Qdisc *sch, u32 classid) { switch (TC_H_MIN(classid)) { @@ -128,16 +145,15 @@ static unsigned long clsact_bind_filter(struct Qdisc *sch, return clsact_get(sch, classid); } -static struct tcf_proto __rcu **clsact_find_tcf(struct Qdisc *sch, - unsigned long cl) +static struct tcf_block *clsact_tcf_block(struct Qdisc *sch, unsigned long cl) { - struct net_device *dev = qdisc_dev(sch); + struct clsact_sched_data *q = qdisc_priv(sch); switch (cl) { case TC_H_MIN(TC_H_MIN_INGRESS): - return &dev->ingress_cl_list; + return q->ingress_block; case TC_H_MIN(TC_H_MIN_EGRESS): - return &dev->egress_cl_list; + return q->egress_block; default: return NULL; } @@ -145,6 +161,18 @@ static struct tcf_proto __rcu **clsact_find_tcf(struct Qdisc *sch, static int clsact_init(struct Qdisc *sch, struct nlattr *opt) { + struct clsact_sched_data *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + int err; + + err = tcf_block_get(&q->ingress_block, &dev->ingress_cl_list); + if (err) + return err; + + err = tcf_block_get(&q->egress_block, &dev->egress_cl_list); + if (err) + return err; + net_inc_ingress_queue(); net_inc_egress_queue(); @@ -155,10 +183,10 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt) static void clsact_destroy(struct Qdisc *sch) { - struct net_device *dev = qdisc_dev(sch); + struct clsact_sched_data *q = qdisc_priv(sch); - tcf_destroy_chain(&dev->ingress_cl_list); - tcf_destroy_chain(&dev->egress_cl_list); + tcf_block_put(q->egress_block); + tcf_block_put(q->ingress_block); net_dec_ingress_queue(); net_dec_egress_queue(); @@ -169,7 +197,7 @@ static const struct Qdisc_class_ops clsact_class_ops = { .get = clsact_get, .put = ingress_put, .walk = ingress_walk, - .tcf_chain = clsact_find_tcf, + .tcf_block = clsact_tcf_block, .tcf_cl_offload = clsact_cl_offload, .bind_tcf = clsact_bind_filter, .unbind_tcf = ingress_put, @@ -178,6 +206,7 @@ static const struct Qdisc_class_ops clsact_class_ops = { static struct Qdisc_ops clsact_qdisc_ops __read_mostly = { .cl_ops = &clsact_class_ops, .id = "clsact", + .priv_size = sizeof(struct clsact_sched_data), .init = clsact_init, .destroy = clsact_destroy, .dump = ingress_dump, diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 0a4cf27ea54b..e0c02725cd48 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -43,7 +43,7 @@ static void mqprio_destroy(struct Qdisc *sch) struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO, { .mqprio = &offload } }; - dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc); + dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, 0, &tc); } else { netdev_set_num_tc(dev, 0); } @@ -152,7 +152,8 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO, { .mqprio = &offload } }; - err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc); + err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, + 0, 0, &tc); if (err) return err; diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 43a3a10b3c81..f143b7bbaa0d 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -32,6 +32,7 @@ struct multiq_sched_data { u16 max_bands; u16 curband; struct tcf_proto __rcu *filter_list; + struct tcf_block *block; struct Qdisc **queues; }; @@ -46,11 +47,12 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) int err; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - err = tc_classify(skb, fl, &res, false); + err = tcf_classify(skb, fl, &res, false); #ifdef CONFIG_NET_CLS_ACT switch (err) { case TC_ACT_STOLEN: case TC_ACT_QUEUED: + case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return NULL; @@ -170,7 +172,7 @@ multiq_destroy(struct Qdisc *sch) int band; struct multiq_sched_data *q = qdisc_priv(sch); - tcf_destroy_chain(&q->filter_list); + tcf_block_put(q->block); for (band = 0; band < q->bands; band++) qdisc_destroy(q->queues[band]); @@ -243,6 +245,10 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt) if (opt == NULL) return -EINVAL; + err = tcf_block_get(&q->block, &q->filter_list); + if (err) + return err; + q->max_bands = qdisc_dev(sch)->num_tx_queues; q->queues = kcalloc(q->max_bands, sizeof(struct Qdisc *), GFP_KERNEL); @@ -367,14 +373,13 @@ static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg) } } -static struct tcf_proto __rcu **multiq_find_tcf(struct Qdisc *sch, - unsigned long cl) +static struct tcf_block *multiq_tcf_block(struct Qdisc *sch, unsigned long cl) { struct multiq_sched_data *q = qdisc_priv(sch); if (cl) return NULL; - return &q->filter_list; + return q->block; } static const struct Qdisc_class_ops multiq_class_ops = { @@ -383,7 +388,7 @@ static const struct Qdisc_class_ops multiq_class_ops = { .get = multiq_get, .put = multiq_put, .walk = multiq_walk, - .tcf_chain = multiq_find_tcf, + .tcf_block = multiq_tcf_block, .bind_tcf = multiq_bind, .unbind_tcf = multiq_put, .dump = multiq_dump_class, diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 92c2e6d448d7..e3e364cc9a70 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -25,6 +25,7 @@ struct prio_sched_data { int bands; struct tcf_proto __rcu *filter_list; + struct tcf_block *block; u8 prio2band[TC_PRIO_MAX+1]; struct Qdisc *queues[TCQ_PRIO_BANDS]; }; @@ -42,11 +43,12 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; if (TC_H_MAJ(skb->priority) != sch->handle) { fl = rcu_dereference_bh(q->filter_list); - err = tc_classify(skb, fl, &res, false); + err = tcf_classify(skb, fl, &res, false); #ifdef CONFIG_NET_CLS_ACT switch (err) { case TC_ACT_STOLEN: case TC_ACT_QUEUED: + case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return NULL; @@ -145,7 +147,7 @@ prio_destroy(struct Qdisc *sch) int prio; struct prio_sched_data *q = qdisc_priv(sch); - tcf_destroy_chain(&q->filter_list); + tcf_block_put(q->block); for (prio = 0; prio < q->bands; prio++) qdisc_destroy(q->queues[prio]); } @@ -204,9 +206,16 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) static int prio_init(struct Qdisc *sch, struct nlattr *opt) { + struct prio_sched_data *q = qdisc_priv(sch); + int err; + if (!opt) return -EINVAL; + err = tcf_block_get(&q->block, &q->filter_list); + if (err) + return err; + return prio_tune(sch, opt); } @@ -317,14 +326,13 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg) } } -static struct tcf_proto __rcu **prio_find_tcf(struct Qdisc *sch, - unsigned long cl) +static struct tcf_block *prio_tcf_block(struct Qdisc *sch, unsigned long cl) { struct prio_sched_data *q = qdisc_priv(sch); if (cl) return NULL; - return &q->filter_list; + return q->block; } static const struct Qdisc_class_ops prio_class_ops = { @@ -333,7 +341,7 @@ static const struct Qdisc_class_ops prio_class_ops = { .get = prio_get, .put = prio_put, .walk = prio_walk, - .tcf_chain = prio_find_tcf, + .tcf_block = prio_tcf_block, .bind_tcf = prio_bind, .unbind_tcf = prio_put, .dump = prio_dump_class, diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 041eba3006cc..0e16dfda0bd7 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -182,6 +182,7 @@ struct qfq_group { struct qfq_sched { struct tcf_proto __rcu *filter_list; + struct tcf_block *block; struct Qdisc_class_hash clhash; u64 oldV, V; /* Precise virtual times. */ @@ -582,15 +583,14 @@ static void qfq_put_class(struct Qdisc *sch, unsigned long arg) qfq_destroy_class(sch, cl); } -static struct tcf_proto __rcu **qfq_tcf_chain(struct Qdisc *sch, - unsigned long cl) +static struct tcf_block *qfq_tcf_block(struct Qdisc *sch, unsigned long cl) { struct qfq_sched *q = qdisc_priv(sch); if (cl) return NULL; - return &q->filter_list; + return q->block; } static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent, @@ -720,12 +720,13 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch, *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; fl = rcu_dereference_bh(q->filter_list); - result = tc_classify(skb, fl, &res, false); + result = tcf_classify(skb, fl, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_QUEUED: case TC_ACT_STOLEN: + case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return NULL; @@ -1438,6 +1439,10 @@ static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt) int i, j, err; u32 max_cl_shift, maxbudg_shift, max_classes; + err = tcf_block_get(&q->block, &q->filter_list); + if (err) + return err; + err = qdisc_class_hash_init(&q->clhash); if (err < 0) return err; @@ -1492,7 +1497,7 @@ static void qfq_destroy_qdisc(struct Qdisc *sch) struct hlist_node *next; unsigned int i; - tcf_destroy_chain(&q->filter_list); + tcf_block_put(q->block); for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], @@ -1508,7 +1513,7 @@ static const struct Qdisc_class_ops qfq_class_ops = { .delete = qfq_delete_class, .get = qfq_get_class, .put = qfq_put_class, - .tcf_chain = qfq_tcf_chain, + .tcf_block = qfq_tcf_block, .bind_tcf = qfq_bind_tcf, .unbind_tcf = qfq_unbind_tcf, .graft = qfq_graft_class, diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 0f777273ba29..11fb6ec878d6 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -56,6 +56,7 @@ struct sfb_bins { struct sfb_sched_data { struct Qdisc *qdisc; struct tcf_proto __rcu *filter_list; + struct tcf_block *block; unsigned long rehash_interval; unsigned long warmup_time; /* double buffering warmup time in jiffies */ u32 max; @@ -259,12 +260,13 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl, struct tcf_result res; int result; - result = tc_classify(skb, fl, &res, false); + result = tcf_classify(skb, fl, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_STOLEN: case TC_ACT_QUEUED: + case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return false; @@ -465,7 +467,7 @@ static void sfb_destroy(struct Qdisc *sch) { struct sfb_sched_data *q = qdisc_priv(sch); - tcf_destroy_chain(&q->filter_list); + tcf_block_put(q->block); qdisc_destroy(q->qdisc); } @@ -549,6 +551,11 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt) static int sfb_init(struct Qdisc *sch, struct nlattr *opt) { struct sfb_sched_data *q = qdisc_priv(sch); + int err; + + err = tcf_block_get(&q->block, &q->filter_list); + if (err) + return err; q->qdisc = &noop_qdisc; return sfb_change(sch, opt); @@ -657,14 +664,13 @@ static void sfb_walk(struct Qdisc *sch, struct qdisc_walker *walker) } } -static struct tcf_proto __rcu **sfb_find_tcf(struct Qdisc *sch, - unsigned long cl) +static struct tcf_block *sfb_tcf_block(struct Qdisc *sch, unsigned long cl) { struct sfb_sched_data *q = qdisc_priv(sch); if (cl) return NULL; - return &q->filter_list; + return q->block; } static unsigned long sfb_bind(struct Qdisc *sch, unsigned long parent, @@ -682,7 +688,7 @@ static const struct Qdisc_class_ops sfb_class_ops = { .change = sfb_change_class, .delete = sfb_delete, .walk = sfb_walk, - .tcf_chain = sfb_find_tcf, + .tcf_block = sfb_tcf_block, .bind_tcf = sfb_bind, .unbind_tcf = sfb_put, .dump = sfb_dump_class, diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 332d94be6e1c..f80ea2cc5f1f 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -126,6 +126,7 @@ struct sfq_sched_data { u8 flags; unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ struct tcf_proto __rcu *filter_list; + struct tcf_block *block; sfq_index *ht; /* Hash table ('divisor' slots) */ struct sfq_slot *slots; /* Flows table ('maxflows' entries) */ @@ -180,12 +181,13 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, return sfq_hash(q, skb) + 1; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - result = tc_classify(skb, fl, &res, false); + result = tcf_classify(skb, fl, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_STOLEN: case TC_ACT_QUEUED: + case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return 0; @@ -697,7 +699,7 @@ static void sfq_destroy(struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); - tcf_destroy_chain(&q->filter_list); + tcf_block_put(q->block); q->perturb_period = 0; del_timer_sync(&q->perturb_timer); sfq_free(q->ht); @@ -709,6 +711,11 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) { struct sfq_sched_data *q = qdisc_priv(sch); int i; + int err; + + err = tcf_block_get(&q->block, &q->filter_list); + if (err) + return err; setup_deferrable_timer(&q->perturb_timer, sfq_perturbation, (unsigned long)sch); @@ -815,14 +822,13 @@ static void sfq_put(struct Qdisc *q, unsigned long cl) { } -static struct tcf_proto __rcu **sfq_find_tcf(struct Qdisc *sch, - unsigned long cl) +static struct tcf_block *sfq_tcf_block(struct Qdisc *sch, unsigned long cl) { struct sfq_sched_data *q = qdisc_priv(sch); if (cl) return NULL; - return &q->filter_list; + return q->block; } static int sfq_dump_class(struct Qdisc *sch, unsigned long cl, @@ -878,7 +884,7 @@ static const struct Qdisc_class_ops sfq_class_ops = { .leaf = sfq_leaf, .get = sfq_get, .put = sfq_put, - .tcf_chain = sfq_find_tcf, + .tcf_block = sfq_tcf_block, .bind_tcf = sfq_bind, .unbind_tcf = sfq_put, .dump = sfq_dump_class, diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 95238284c422..40ec83679d6e 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -71,7 +71,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a { struct net *net = sock_net(sk); struct sctp_sock *sp; - sctp_paramhdr_t *p; + struct sctp_paramhdr *p; int i; /* Retrieve the SCTP per socket area. */ @@ -88,7 +88,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->base.type = SCTP_EP_TYPE_ASSOCIATION; /* Initialize the object handling fields. */ - atomic_set(&asoc->base.refcnt, 1); + refcount_set(&asoc->base.refcnt, 1); /* Initialize the bind addr area. */ sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port); @@ -246,7 +246,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a if (!sctp_ulpq_init(&asoc->ulpq, asoc)) goto fail_init; - if (sctp_stream_new(asoc, gfp)) + if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, + 0, gfp)) goto fail_init; /* Assume that peer would support both address types unless we are @@ -283,15 +284,15 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a ntohs(ep->auth_chunk_list->param_hdr.length)); /* Get the AUTH random number for this association */ - p = (sctp_paramhdr_t *)asoc->c.auth_random; + p = (struct sctp_paramhdr *)asoc->c.auth_random; p->type = SCTP_PARAM_RANDOM; - p->length = htons(sizeof(sctp_paramhdr_t) + SCTP_AUTH_RANDOM_LENGTH); + p->length = htons(sizeof(*p) + SCTP_AUTH_RANDOM_LENGTH); get_random_bytes(p+1, SCTP_AUTH_RANDOM_LENGTH); return asoc; stream_free: - sctp_stream_free(asoc->stream); + sctp_stream_free(&asoc->stream); fail_init: sock_put(asoc->base.sk); sctp_endpoint_put(asoc->ep); @@ -365,7 +366,7 @@ void sctp_association_free(struct sctp_association *asoc) sctp_tsnmap_free(&asoc->peer.tsn_map); /* Free stream information. */ - sctp_stream_free(asoc->stream); + sctp_stream_free(&asoc->stream); if (asoc->strreset_chunk) sctp_chunk_free(asoc->strreset_chunk); @@ -872,7 +873,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, /* Hold a reference to an association. */ void sctp_association_hold(struct sctp_association *asoc) { - atomic_inc(&asoc->base.refcnt); + refcount_inc(&asoc->base.refcnt); } /* Release a reference to an association and cleanup @@ -880,7 +881,7 @@ void sctp_association_hold(struct sctp_association *asoc) */ void sctp_association_put(struct sctp_association *asoc) { - if (atomic_dec_and_test(&asoc->base.refcnt)) + if (refcount_dec_and_test(&asoc->base.refcnt)) sctp_association_destroy(asoc); } @@ -1111,8 +1112,8 @@ void sctp_assoc_migrate(struct sctp_association *assoc, struct sock *newsk) } /* Update an association (possibly from unexpected COOKIE-ECHO processing). */ -void sctp_assoc_update(struct sctp_association *asoc, - struct sctp_association *new) +int sctp_assoc_update(struct sctp_association *asoc, + struct sctp_association *new) { struct sctp_transport *trans; struct list_head *pos, *temp; @@ -1123,8 +1124,10 @@ void sctp_assoc_update(struct sctp_association *asoc, asoc->peer.sack_needed = new->peer.sack_needed; asoc->peer.auth_capable = new->peer.auth_capable; asoc->peer.i = new->peer.i; - sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL, - asoc->peer.i.initial_tsn, GFP_ATOMIC); + + if (!sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL, + asoc->peer.i.initial_tsn, GFP_ATOMIC)) + return -ENOMEM; /* Remove any peer addresses not present in the new association. */ list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) { @@ -1151,7 +1154,7 @@ void sctp_assoc_update(struct sctp_association *asoc, /* Reinitialize SSN for both local streams * and peer's streams. */ - sctp_stream_clear(asoc->stream); + sctp_stream_clear(&asoc->stream); /* Flush the ULP reassembly and ordered queue. * Any data there will now be stale and will @@ -1168,27 +1171,21 @@ void sctp_assoc_update(struct sctp_association *asoc, } else { /* Add any peer addresses from the new association. */ list_for_each_entry(trans, &new->peer.transport_addr_list, - transports) { - if (!sctp_assoc_lookup_paddr(asoc, &trans->ipaddr)) - sctp_assoc_add_peer(asoc, &trans->ipaddr, - GFP_ATOMIC, trans->state); - } + transports) + if (!sctp_assoc_lookup_paddr(asoc, &trans->ipaddr) && + !sctp_assoc_add_peer(asoc, &trans->ipaddr, + GFP_ATOMIC, trans->state)) + return -ENOMEM; asoc->ctsn_ack_point = asoc->next_tsn - 1; asoc->adv_peer_ack_point = asoc->ctsn_ack_point; - if (sctp_state(asoc, COOKIE_WAIT)) { - sctp_stream_free(asoc->stream); - asoc->stream = new->stream; - new->stream = NULL; - } + if (sctp_state(asoc, COOKIE_WAIT)) + sctp_stream_update(&asoc->stream, &new->stream); - if (!asoc->assoc_id) { - /* get a new association id since we don't have one - * yet. - */ - sctp_assoc_set_id(asoc, GFP_ATOMIC); - } + /* get a new assoc id if we don't have one yet. */ + if (sctp_assoc_set_id(asoc, GFP_ATOMIC)) + return -ENOMEM; } /* SCTP-AUTH: Save the peer parameters from the new associations @@ -1206,7 +1203,7 @@ void sctp_assoc_update(struct sctp_association *asoc, asoc->peer.peer_hmacs = new->peer.peer_hmacs; new->peer.peer_hmacs = NULL; - sctp_auth_asoc_init_active_key(asoc, GFP_ATOMIC); + return sctp_auth_asoc_init_active_key(asoc, GFP_ATOMIC); } /* Update the retran path for sending a retransmitted packet. diff --git a/net/sctp/auth.c b/net/sctp/auth.c index f99d4855d3de..e001b01b0e68 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -63,7 +63,7 @@ void sctp_auth_key_put(struct sctp_auth_bytes *key) if (!key) return; - if (atomic_dec_and_test(&key->refcnt)) { + if (refcount_dec_and_test(&key->refcnt)) { kzfree(key); SCTP_DBG_OBJCNT_DEC(keys); } @@ -84,7 +84,7 @@ static struct sctp_auth_bytes *sctp_auth_create_key(__u32 key_len, gfp_t gfp) return NULL; key->len = key_len; - atomic_set(&key->refcnt, 1); + refcount_set(&key->refcnt, 1); SCTP_DBG_OBJCNT_INC(keys); return key; @@ -538,7 +538,8 @@ struct sctp_hmac *sctp_auth_asoc_get_hmac(const struct sctp_association *asoc) if (!hmacs) return NULL; - n_elt = (ntohs(hmacs->param_hdr.length) - sizeof(sctp_paramhdr_t)) >> 1; + n_elt = (ntohs(hmacs->param_hdr.length) - + sizeof(struct sctp_paramhdr)) >> 1; for (i = 0; i < n_elt; i++) { id = ntohs(hmacs->hmac_ids[i]); @@ -589,7 +590,8 @@ int sctp_auth_asoc_verify_hmac_id(const struct sctp_association *asoc, return 0; hmacs = (struct sctp_hmac_algo_param *)asoc->c.auth_hmacs; - n_elt = (ntohs(hmacs->param_hdr.length) - sizeof(sctp_paramhdr_t)) >> 1; + n_elt = (ntohs(hmacs->param_hdr.length) - + sizeof(struct sctp_paramhdr)) >> 1; return __sctp_auth_find_hmacid(hmacs->hmac_ids, n_elt, hmac_id); } @@ -612,8 +614,8 @@ void sctp_auth_asoc_set_default_hmac(struct sctp_association *asoc, if (asoc->default_hmac_id) return; - n_params = (ntohs(hmacs->param_hdr.length) - - sizeof(sctp_paramhdr_t)) >> 1; + n_params = (ntohs(hmacs->param_hdr.length) - + sizeof(struct sctp_paramhdr)) >> 1; ep = asoc->ep; for (i = 0; i < n_params; i++) { id = ntohs(hmacs->hmac_ids[i]); @@ -632,7 +634,7 @@ void sctp_auth_asoc_set_default_hmac(struct sctp_association *asoc, /* Check to see if the given chunk is supposed to be authenticated */ -static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param) +static int __sctp_auth_cid(enum sctp_cid chunk, struct sctp_chunks_param *param) { unsigned short len; int found = 0; @@ -641,7 +643,7 @@ static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param) if (!param || param->param_hdr.length == 0) return 0; - len = ntohs(param->param_hdr.length) - sizeof(sctp_paramhdr_t); + len = ntohs(param->param_hdr.length) - sizeof(struct sctp_paramhdr); /* SCTP-AUTH, Section 3.2 * The chunk types for INIT, INIT-ACK, SHUTDOWN-COMPLETE and AUTH @@ -668,7 +670,7 @@ static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param) } /* Check if peer requested that this chunk is authenticated */ -int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc) +int sctp_auth_send_cid(enum sctp_cid chunk, const struct sctp_association *asoc) { if (!asoc) return 0; @@ -680,7 +682,7 @@ int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc) } /* Check if we requested that peer authenticate this chunk. */ -int sctp_auth_recv_cid(sctp_cid_t chunk, const struct sctp_association *asoc) +int sctp_auth_recv_cid(enum sctp_cid chunk, const struct sctp_association *asoc) { if (!asoc) return 0; @@ -775,7 +777,7 @@ int sctp_auth_ep_add_chunkid(struct sctp_endpoint *ep, __u8 chunk_id) /* Check if we can add this chunk to the array */ param_len = ntohs(p->param_hdr.length); - nchunks = param_len - sizeof(sctp_paramhdr_t); + nchunks = param_len - sizeof(struct sctp_paramhdr); if (nchunks == SCTP_NUM_CHUNK_TYPES) return -EINVAL; @@ -812,9 +814,11 @@ int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep, return -EINVAL; for (i = 0; i < hmacs->shmac_num_idents; i++) - ep->auth_hmacs_list->hmac_ids[i] = htons(hmacs->shmac_idents[i]); - ep->auth_hmacs_list->param_hdr.length = htons(sizeof(sctp_paramhdr_t) + - hmacs->shmac_num_idents * sizeof(__u16)); + ep->auth_hmacs_list->hmac_ids[i] = + htons(hmacs->shmac_idents[i]); + ep->auth_hmacs_list->param_hdr.length = + htons(sizeof(struct sctp_paramhdr) + + hmacs->shmac_num_idents * sizeof(__u16)); return 0; } diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 697721a7a3f1..1323d41e68b8 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -49,7 +49,7 @@ /* Initialize datamsg from memory. */ static void sctp_datamsg_init(struct sctp_datamsg *msg) { - atomic_set(&msg->refcnt, 1); + refcount_set(&msg->refcnt, 1); msg->send_failed = 0; msg->send_error = 0; msg->can_delay = 1; @@ -136,13 +136,13 @@ static void sctp_datamsg_destroy(struct sctp_datamsg *msg) /* Hold a reference. */ static void sctp_datamsg_hold(struct sctp_datamsg *msg) { - atomic_inc(&msg->refcnt); + refcount_inc(&msg->refcnt); } /* Release a reference. */ void sctp_datamsg_put(struct sctp_datamsg *msg) { - if (atomic_dec_and_test(&msg->refcnt)) + if (refcount_dec_and_test(&msg->refcnt)) sctp_datamsg_destroy(msg); } @@ -307,7 +307,7 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) && time_after(jiffies, chunk->msg->expires_at)) { struct sctp_stream_out *streamout = - &chunk->asoc->stream->out[chunk->sinfo.sinfo_stream]; + &chunk->asoc->stream.out[chunk->sinfo.sinfo_stream]; if (chunk->sent_count) { chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++; @@ -320,7 +320,7 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) && chunk->sent_count > chunk->sinfo.sinfo_timetolive) { struct sctp_stream_out *streamout = - &chunk->asoc->stream->out[chunk->sinfo.sinfo_stream]; + &chunk->asoc->stream.out[chunk->sinfo.sinfo_stream]; chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++; streamout->abandoned_sent[SCTP_PR_INDEX(RTX)]++; diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 3dcd0ecf3d99..0e86f988f836 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -90,12 +90,13 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, */ auth_hmacs->param_hdr.type = SCTP_PARAM_HMAC_ALGO; auth_hmacs->param_hdr.length = - htons(sizeof(sctp_paramhdr_t) + 2); + htons(sizeof(struct sctp_paramhdr) + 2); auth_hmacs->hmac_ids[0] = htons(SCTP_AUTH_HMAC_ID_SHA1); /* Initialize the CHUNKS parameter */ auth_chunks->param_hdr.type = SCTP_PARAM_CHUNKS; - auth_chunks->param_hdr.length = htons(sizeof(sctp_paramhdr_t)); + auth_chunks->param_hdr.length = + htons(sizeof(struct sctp_paramhdr)); /* If the Add-IP functionality is enabled, we must * authenticate, ASCONF and ASCONF-ACK chunks @@ -104,7 +105,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, auth_chunks->chunks[0] = SCTP_CID_ASCONF; auth_chunks->chunks[1] = SCTP_CID_ASCONF_ACK; auth_chunks->param_hdr.length = - htons(sizeof(sctp_paramhdr_t) + 2); + htons(sizeof(struct sctp_paramhdr) + 2); } } @@ -113,7 +114,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, ep->base.type = SCTP_EP_TYPE_SOCKET; /* Initialize the basic object fields. */ - atomic_set(&ep->base.refcnt, 1); + refcount_set(&ep->base.refcnt, 1); ep->base.dead = false; /* Create an input queue. */ @@ -268,16 +269,14 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) memset(ep->secret_key, 0, sizeof(ep->secret_key)); - /* Give up our hold on the sock. */ sk = ep->base.sk; - if (sk != NULL) { - /* Remove and free the port */ - if (sctp_sk(sk)->bind_hash) - sctp_put_port(sk); + /* Remove and free the port */ + if (sctp_sk(sk)->bind_hash) + sctp_put_port(sk); - sctp_sk(sk)->ep = NULL; - sock_put(sk); - } + sctp_sk(sk)->ep = NULL; + /* Give up our hold on the sock */ + sock_put(sk); kfree(ep); SCTP_DBG_OBJCNT_DEC(ep); @@ -286,7 +285,7 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) /* Hold a reference to an endpoint. */ void sctp_endpoint_hold(struct sctp_endpoint *ep) { - atomic_inc(&ep->base.refcnt); + refcount_inc(&ep->base.refcnt); } /* Release a reference to an endpoint and clean up if there are @@ -294,7 +293,7 @@ void sctp_endpoint_hold(struct sctp_endpoint *ep) */ void sctp_endpoint_put(struct sctp_endpoint *ep) { - if (atomic_dec_and_test(&ep->base.refcnt)) + if (refcount_dec_and_test(&ep->base.refcnt)) sctp_endpoint_destroy(ep); } diff --git a/net/sctp/input.c b/net/sctp/input.c index ba9ad32fc447..41eb2ec10460 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -663,19 +663,19 @@ out_unlock: */ static int sctp_rcv_ootb(struct sk_buff *skb) { - sctp_chunkhdr_t *ch, _ch; + struct sctp_chunkhdr *ch, _ch; int ch_end, offset = 0; /* Scan through all the chunks in the packet. */ do { /* Make sure we have at least the header there */ - if (offset + sizeof(sctp_chunkhdr_t) > skb->len) + if (offset + sizeof(_ch) > skb->len) break; ch = skb_header_pointer(skb, offset, sizeof(*ch), &_ch); /* Break out if chunk length is less then minimal. */ - if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t)) + if (ntohs(ch->length) < sizeof(_ch)) break; ch_end = offset + SCTP_PAD4(ntohs(ch->length)); @@ -1051,7 +1051,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net, union sctp_addr *paddr = &addr; struct sctphdr *sh = sctp_hdr(skb); union sctp_params params; - sctp_init_chunk_t *init; + struct sctp_init_chunk *init; struct sctp_af *af; /* @@ -1070,7 +1070,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net, /* Find the start of the TLVs and the end of the chunk. This is * the region we search for address parameters. */ - init = (sctp_init_chunk_t *)skb->data; + init = (struct sctp_init_chunk *)skb->data; /* Walk the parameters looking for embedded addresses. */ sctp_walk_params(params, init, init_hdr.params) { @@ -1106,7 +1106,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net, */ static struct sctp_association *__sctp_rcv_asconf_lookup( struct net *net, - sctp_chunkhdr_t *ch, + struct sctp_chunkhdr *ch, const union sctp_addr *laddr, __be16 peer_port, struct sctp_transport **transportp) @@ -1144,7 +1144,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net, struct sctp_transport **transportp) { struct sctp_association *asoc = NULL; - sctp_chunkhdr_t *ch; + struct sctp_chunkhdr *ch; int have_auth = 0; unsigned int chunk_num = 1; __u8 *ch_end; @@ -1152,10 +1152,10 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net, /* Walk through the chunks looking for AUTH or ASCONF chunks * to help us find the association. */ - ch = (sctp_chunkhdr_t *) skb->data; + ch = (struct sctp_chunkhdr *)skb->data; do { /* Break out if chunk length is less then minimal. */ - if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t)) + if (ntohs(ch->length) < sizeof(*ch)) break; ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length)); @@ -1192,7 +1192,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net, if (asoc) break; - ch = (sctp_chunkhdr_t *) ch_end; + ch = (struct sctp_chunkhdr *)ch_end; chunk_num++; } while (ch_end < skb_tail_pointer(skb)); @@ -1210,7 +1210,7 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net, const union sctp_addr *laddr, struct sctp_transport **transportp) { - sctp_chunkhdr_t *ch; + struct sctp_chunkhdr *ch; /* We do not allow GSO frames here as we need to linearize and * then cannot guarantee frame boundaries. This shouldn't be an @@ -1220,7 +1220,7 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net, if ((skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP) return NULL; - ch = (sctp_chunkhdr_t *) skb->data; + ch = (struct sctp_chunkhdr *)skb->data; /* The code below will attempt to walk the chunk and extract * parameter information. Before we do that, we need to verify diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index f731de3e8428..48392552ee7c 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -99,7 +99,7 @@ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk) struct sctp_chunkhdr *sctp_inq_peek(struct sctp_inq *queue) { struct sctp_chunk *chunk; - sctp_chunkhdr_t *ch = NULL; + struct sctp_chunkhdr *ch = NULL; chunk = queue->in_progress; /* If there is no more chunks in this packet, say so */ @@ -108,7 +108,7 @@ struct sctp_chunkhdr *sctp_inq_peek(struct sctp_inq *queue) chunk->pdiscard) return NULL; - ch = (sctp_chunkhdr_t *)chunk->chunk_end; + ch = (struct sctp_chunkhdr *)chunk->chunk_end; return ch; } @@ -122,7 +122,7 @@ struct sctp_chunkhdr *sctp_inq_peek(struct sctp_inq *queue) struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) { struct sctp_chunk *chunk; - sctp_chunkhdr_t *ch = NULL; + struct sctp_chunkhdr *ch = NULL; /* The assumption is that we are safe to process the chunks * at this time. @@ -151,7 +151,7 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) chunk = queue->in_progress = NULL; } else { /* Nothing to do. Next chunk in the packet, please. */ - ch = (sctp_chunkhdr_t *) chunk->chunk_end; + ch = (struct sctp_chunkhdr *)chunk->chunk_end; /* Force chunk->skb->data to chunk->chunk_end. */ skb_pull(chunk->skb, chunk->chunk_end - chunk->skb->data); /* We are guaranteed to pull a SCTP header. */ @@ -195,7 +195,7 @@ next_chunk: new_skb: /* This is the first chunk in the packet. */ - ch = (sctp_chunkhdr_t *) chunk->skb->data; + ch = (struct sctp_chunkhdr *)chunk->skb->data; chunk->singleton = 1; chunk->data_accepted = 0; chunk->pdiscard = 0; @@ -214,11 +214,10 @@ new_skb: chunk->chunk_hdr = ch; chunk->chunk_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length)); - skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t)); + skb_pull(chunk->skb, sizeof(*ch)); chunk->subh.v = NULL; /* Subheader is no longer valid. */ - if (chunk->chunk_end + sizeof(sctp_chunkhdr_t) < - skb_tail_pointer(chunk->skb)) { + if (chunk->chunk_end + sizeof(*ch) < skb_tail_pointer(chunk->skb)) { /* This is not a singleton */ chunk->singleton = 0; } else if (chunk->chunk_end > skb_tail_pointer(chunk->skb)) { diff --git a/net/sctp/offload.c b/net/sctp/offload.c index 4f5a2b580aa5..275925b93b29 100644 --- a/net/sctp/offload.c +++ b/net/sctp/offload.c @@ -35,6 +35,7 @@ static __le32 sctp_gso_make_checksum(struct sk_buff *skb) { skb->ip_summed = CHECKSUM_NONE; + skb->csum_not_inet = 0; return sctp_compute_cksum(skb, skb_transport_offset(skb)); } @@ -98,6 +99,11 @@ static const struct net_offload sctp6_offload = { }, }; +static const struct skb_checksum_ops crc32c_csum_ops = { + .update = sctp_csum_update, + .combine = sctp_csum_combine, +}; + int __init sctp_offload_init(void) { int ret; @@ -110,6 +116,7 @@ int __init sctp_offload_init(void) if (ret) goto ipv4; + crc32c_csum_stub = &crc32c_csum_ops; return ret; ipv4: diff --git a/net/sctp/output.c b/net/sctp/output.c index 1409a875ad8e..9d8504985744 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -402,7 +402,7 @@ static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk) * therefore only reserve a single byte to keep socket around until * the packet has been transmitted. */ - atomic_inc(&sk->sk_wmem_alloc); + refcount_inc(&sk->sk_wmem_alloc); } static int sctp_packet_pack(struct sctp_packet *packet, @@ -463,14 +463,13 @@ merge: padding = SCTP_PAD4(chunk->skb->len) - chunk->skb->len; if (padding) - memset(skb_put(chunk->skb, padding), 0, padding); + skb_put_zero(chunk->skb, padding); if (chunk == packet->auth) auth = (struct sctp_auth_chunk *) skb_tail_pointer(nskb); - memcpy(skb_put(nskb, chunk->skb->len), chunk->skb->data, - chunk->skb->len); + skb_put_data(nskb, chunk->skb->data, chunk->skb->len); pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, rtt_in_progress:%d\n", chunk, @@ -538,6 +537,7 @@ merge: } else { chksum: head->ip_summed = CHECKSUM_PARTIAL; + head->csum_not_inet = 1; head->csum_start = skb_transport_header(head) - head->head; head->csum_offset = offsetof(struct sctphdr, checksum); } @@ -585,7 +585,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) sctp_packet_set_owner_w(head, sk); /* set sctp header */ - sh = (struct sctphdr *)skb_push(head, sizeof(struct sctphdr)); + sh = skb_push(head, sizeof(struct sctphdr)); skb_reset_transport_header(head); sh->source = htons(packet->source_port); sh->dest = htons(packet->destination_port); @@ -723,8 +723,8 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, /* Check whether this chunk and all the rest of pending data will fit * or delay in hopes of bundling a full sized packet. */ - if (chunk->skb->len + q->out_qlen > - transport->pathmtu - packet->overhead - sizeof(sctp_data_chunk_t) - 4) + if (chunk->skb->len + q->out_qlen > transport->pathmtu - + packet->overhead - sizeof(struct sctp_data_chunk) - 4) /* Enough data queued to fill a packet */ return SCTP_XMIT_OK; diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index fe4c3d462f6e..e8762702a313 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -363,7 +363,7 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc, sctp_insert_list(&asoc->outqueue.abandoned, &chk->transmitted_list); - streamout = &asoc->stream->out[chk->sinfo.sinfo_stream]; + streamout = &asoc->stream.out[chk->sinfo.sinfo_stream]; asoc->sent_cnt_removable--; asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; streamout->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; @@ -400,9 +400,9 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, q->out_qlen -= chk->skb->len; asoc->sent_cnt_removable--; asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; - if (chk->sinfo.sinfo_stream < asoc->stream->outcnt) { + if (chk->sinfo.sinfo_stream < asoc->stream.outcnt) { struct sctp_stream_out *streamout = - &asoc->stream->out[chk->sinfo.sinfo_stream]; + &asoc->stream.out[chk->sinfo.sinfo_stream]; streamout->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; } @@ -1036,7 +1036,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) /* RFC 2960 6.5 Every DATA chunk MUST carry a valid * stream identifier. */ - if (chunk->sinfo.sinfo_stream >= asoc->stream->outcnt) { + if (chunk->sinfo.sinfo_stream >= asoc->stream.outcnt) { /* Mark as failed send. */ sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM); @@ -1054,7 +1054,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) continue; } - if (asoc->stream->out[sid].state == SCTP_STREAM_CLOSED) { + if (asoc->stream.out[sid].state == SCTP_STREAM_CLOSED) { sctp_outq_head_data(q, chunk); goto sctp_flush_out; } @@ -1102,7 +1102,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) : "illegal chunk", ntohl(chunk->subh.data_hdr->tsn), chunk->skb ? chunk->skb->head : NULL, chunk->skb ? - atomic_read(&chunk->skb->users) : -1); + refcount_read(&chunk->skb->users) : -1); /* Add the chunk to the packet. */ status = sctp_packet_transmit_chunk(packet, chunk, 0, gfp); diff --git a/net/sctp/proc.c b/net/sctp/proc.c index a0b29d43627f..26b4be6b4172 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -218,8 +218,7 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v) return -ENOMEM; head = &sctp_ep_hashtable[hash]; - local_bh_disable(); - read_lock(&head->lock); + read_lock_bh(&head->lock); sctp_for_each_hentry(epb, &head->chain) { ep = sctp_ep(epb); sk = epb->sk; @@ -234,8 +233,7 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v) sctp_seq_dump_local_addrs(seq, epb); seq_printf(seq, "\n"); } - read_unlock(&head->lock); - local_bh_enable(); + read_unlock_bh(&head->lock); return 0; } @@ -361,11 +359,11 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) sctp_seq_dump_remote_addrs(seq, assoc); seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d " "%8d %8d %8d %8d", - assoc->hbinterval, assoc->stream->incnt, - assoc->stream->outcnt, assoc->max_retrans, + assoc->hbinterval, assoc->stream.incnt, + assoc->stream.outcnt, assoc->max_retrans, assoc->init_retries, assoc->shutdown_retries, assoc->rtx_data_chunks, - atomic_read(&sk->sk_wmem_alloc), + refcount_read(&sk->sk_wmem_alloc), sk->sk_wmem_queued, sk->sk_sndbuf, sk->sk_rcvbuf); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 92e332e17391..4e16b02ed832 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -217,7 +217,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, { struct net *net = sock_net(asoc->base.sk); struct sctp_endpoint *ep = asoc->ep; - sctp_inithdr_t init; + struct sctp_inithdr init; union sctp_params addrs; size_t chunksize; struct sctp_chunk *retval = NULL; @@ -229,7 +229,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, sctp_supported_ext_param_t ext_param; int num_ext = 0; __u8 extensions[3]; - sctp_paramhdr_t *auth_chunks = NULL, + struct sctp_paramhdr *auth_chunks = NULL, *auth_hmacs = NULL; /* RFC 2960 3.3.2 Initiation (INIT) (1) @@ -286,14 +286,14 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, chunksize += sizeof(asoc->c.auth_random); /* Add HMACS parameter length if any were defined */ - auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs; + auth_hmacs = (struct sctp_paramhdr *)asoc->c.auth_hmacs; if (auth_hmacs->length) chunksize += SCTP_PAD4(ntohs(auth_hmacs->length)); else auth_hmacs = NULL; /* Add CHUNKS parameter length */ - auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks; + auth_chunks = (struct sctp_paramhdr *)asoc->c.auth_chunks; if (auth_chunks->length) chunksize += SCTP_PAD4(ntohs(auth_chunks->length)); else @@ -385,7 +385,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, const struct sctp_chunk *chunk, gfp_t gfp, int unkparam_len) { - sctp_inithdr_t initack; + struct sctp_inithdr initack; struct sctp_chunk *retval; union sctp_params addrs; struct sctp_sock *sp; @@ -397,7 +397,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, sctp_supported_ext_param_t ext_param; int num_ext = 0; __u8 extensions[3]; - sctp_paramhdr_t *auth_chunks = NULL, + struct sctp_paramhdr *auth_chunks = NULL, *auth_hmacs = NULL, *auth_random = NULL; @@ -448,16 +448,16 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, chunksize += sizeof(aiparam); if (asoc->peer.auth_capable) { - auth_random = (sctp_paramhdr_t *)asoc->c.auth_random; + auth_random = (struct sctp_paramhdr *)asoc->c.auth_random; chunksize += ntohs(auth_random->length); - auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs; + auth_hmacs = (struct sctp_paramhdr *)asoc->c.auth_hmacs; if (auth_hmacs->length) chunksize += SCTP_PAD4(ntohs(auth_hmacs->length)); else auth_hmacs = NULL; - auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks; + auth_chunks = (struct sctp_paramhdr *)asoc->c.auth_chunks; if (auth_chunks->length) chunksize += SCTP_PAD4(ntohs(auth_chunks->length)); else @@ -1085,18 +1085,18 @@ struct sctp_chunk *sctp_make_abort_violation( struct sctp_chunk *retval; struct sctp_paramhdr phdr; - retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) + paylen - + sizeof(sctp_paramhdr_t)); + retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) + paylen + + sizeof(phdr)); if (!retval) goto end; - sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION, paylen - + sizeof(sctp_paramhdr_t)); + sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION, paylen + + sizeof(phdr)); phdr.type = htons(chunk->chunk_hdr->type); phdr.length = chunk->chunk_hdr->length; sctp_addto_chunk(retval, paylen, payload); - sctp_addto_param(retval, sizeof(sctp_paramhdr_t), &phdr); + sctp_addto_param(retval, sizeof(phdr), &phdr); end: return retval; @@ -1110,16 +1110,16 @@ struct sctp_chunk *sctp_make_violation_paramlen( struct sctp_chunk *retval; static const char error[] = "The following parameter had invalid length:"; size_t payload_len = sizeof(error) + sizeof(sctp_errhdr_t) + - sizeof(sctp_paramhdr_t); + sizeof(*param); retval = sctp_make_abort(asoc, chunk, payload_len); if (!retval) goto nodata; sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION, - sizeof(error) + sizeof(sctp_paramhdr_t)); + sizeof(error) + sizeof(*param)); sctp_addto_chunk(retval, sizeof(error), error); - sctp_addto_param(retval, sizeof(sctp_paramhdr_t), param); + sctp_addto_param(retval, sizeof(*param), param); nodata: return retval; @@ -1296,8 +1296,7 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc) retval->subh.auth_hdr = sctp_addto_chunk(retval, sizeof(sctp_authhdr_t), &auth_hdr); - hmac = skb_put(retval->skb, hmac_desc->hmac_len); - memset(hmac, 0, hmac_desc->hmac_len); + hmac = skb_put_zero(retval->skb, hmac_desc->hmac_len); /* Adjust the chunk header to include the empty MAC */ retval->chunk_hdr->length = @@ -1346,7 +1345,7 @@ struct sctp_chunk *sctp_chunkify(struct sk_buff *skb, INIT_LIST_HEAD(&retval->transmitted_list); INIT_LIST_HEAD(&retval->frag_list); SCTP_DBG_OBJCNT_INC(chunk); - atomic_set(&retval->refcnt, 1); + refcount_set(&retval->refcnt, 1); nodata: return retval; @@ -1380,20 +1379,20 @@ static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc, gfp_t gfp) { struct sctp_chunk *retval; - sctp_chunkhdr_t *chunk_hdr; + struct sctp_chunkhdr *chunk_hdr; struct sk_buff *skb; struct sock *sk; /* No need to allocate LL here, as this is only a chunk. */ - skb = alloc_skb(SCTP_PAD4(sizeof(sctp_chunkhdr_t) + paylen), gfp); + skb = alloc_skb(SCTP_PAD4(sizeof(*chunk_hdr) + paylen), gfp); if (!skb) goto nodata; /* Make room for the chunk header. */ - chunk_hdr = (sctp_chunkhdr_t *)skb_put(skb, sizeof(sctp_chunkhdr_t)); + chunk_hdr = (struct sctp_chunkhdr *)skb_put(skb, sizeof(*chunk_hdr)); chunk_hdr->type = type; chunk_hdr->flags = flags; - chunk_hdr->length = htons(sizeof(sctp_chunkhdr_t)); + chunk_hdr->length = htons(sizeof(*chunk_hdr)); sk = asoc ? asoc->base.sk : NULL; retval = sctp_chunkify(skb, asoc, sk, gfp); @@ -1403,7 +1402,7 @@ static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc, } retval->chunk_hdr = chunk_hdr; - retval->chunk_end = ((__u8 *)chunk_hdr) + sizeof(struct sctp_chunkhdr); + retval->chunk_end = ((__u8 *)chunk_hdr) + sizeof(*chunk_hdr); /* Determine if the chunk needs to be authenticated */ if (sctp_auth_send_cid(type, asoc)) @@ -1459,13 +1458,13 @@ void sctp_chunk_free(struct sctp_chunk *chunk) /* Grab a reference to the chunk. */ void sctp_chunk_hold(struct sctp_chunk *ch) { - atomic_inc(&ch->refcnt); + refcount_inc(&ch->refcnt); } /* Release a reference to the chunk. */ void sctp_chunk_put(struct sctp_chunk *ch) { - if (atomic_dec_and_test(&ch->refcnt)) + if (refcount_dec_and_test(&ch->refcnt)) sctp_chunk_destroy(ch); } @@ -1475,15 +1474,11 @@ void sctp_chunk_put(struct sctp_chunk *ch) void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data) { void *target; - void *padding; int chunklen = ntohs(chunk->chunk_hdr->length); int padlen = SCTP_PAD4(chunklen) - chunklen; - padding = skb_put(chunk->skb, padlen); - target = skb_put(chunk->skb, len); - - memset(padding, 0, padlen); - memcpy(target, data, len); + skb_put_zero(chunk->skb, padlen); + target = skb_put_data(chunk->skb, data, len); /* Adjust the chunk length field. */ chunk->chunk_hdr->length = htons(chunklen + padlen + len); @@ -1544,7 +1539,7 @@ void sctp_chunk_assign_ssn(struct sctp_chunk *chunk) /* All fragments will be on the same stream */ sid = ntohs(chunk->subh.data_hdr->stream); - stream = chunk->asoc->stream; + stream = &chunk->asoc->stream; /* Now assign the sequence number to the entire message. * All fragments must have the same stream sequence number. @@ -1619,7 +1614,7 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, /* Header size is static data prior to the actual cookie, including * any padding. */ - headersize = sizeof(sctp_paramhdr_t) + + headersize = sizeof(struct sctp_paramhdr) + (sizeof(struct sctp_signed_cookie) - sizeof(struct sctp_cookie)); bodysize = sizeof(struct sctp_cookie) @@ -1715,7 +1710,7 @@ struct sctp_association *sctp_unpack_cookie( /* Header size is static data prior to the actual cookie, including * any padding. */ - headersize = sizeof(sctp_chunkhdr_t) + + headersize = sizeof(struct sctp_chunkhdr) + (sizeof(struct sctp_signed_cookie) - sizeof(struct sctp_cookie)); bodysize = ntohs(chunk->chunk_hdr->length) - headersize; @@ -1887,7 +1882,7 @@ struct __sctp_missing { * Report a missing mandatory parameter. */ static int sctp_process_missing_param(const struct sctp_association *asoc, - sctp_param_t paramtype, + enum sctp_param paramtype, struct sctp_chunk *chunk, struct sctp_chunk **errp) { @@ -1980,7 +1975,7 @@ static int sctp_process_hn_param(const struct sctp_association *asoc, static int sctp_verify_ext_param(struct net *net, union sctp_params param) { - __u16 num_ext = ntohs(param.p->length) - sizeof(sctp_paramhdr_t); + __u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr); int have_auth = 0; int have_asconf = 0; int i; @@ -2015,7 +2010,7 @@ static void sctp_process_ext_param(struct sctp_association *asoc, union sctp_params param) { struct net *net = sock_net(asoc->base.sk); - __u16 num_ext = ntohs(param.p->length) - sizeof(sctp_paramhdr_t); + __u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr); int i; for (i = 0; i < num_ext; i++) { @@ -2128,7 +2123,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, union sctp_params param, - sctp_cid_t cid, + enum sctp_cid cid, struct sctp_chunk *chunk, struct sctp_chunk **err_chunk) { @@ -2185,7 +2180,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net, * cause 'Protocol Violation'. */ if (SCTP_AUTH_RANDOM_LENGTH != - ntohs(param.p->length) - sizeof(sctp_paramhdr_t)) { + ntohs(param.p->length) - sizeof(struct sctp_paramhdr)) { sctp_process_inv_paramlength(asoc, param.p, chunk, err_chunk); retval = SCTP_IERROR_ABORT; @@ -2213,7 +2208,8 @@ static sctp_ierror_t sctp_verify_param(struct net *net, goto fallthrough; hmacs = (struct sctp_hmac_algo_param *)param.p; - n_elt = (ntohs(param.p->length) - sizeof(sctp_paramhdr_t)) >> 1; + n_elt = (ntohs(param.p->length) - + sizeof(struct sctp_paramhdr)) >> 1; /* SCTP-AUTH: Section 6.1 * The HMAC algorithm based on SHA-1 MUST be supported and @@ -2245,9 +2241,9 @@ fallthrough: /* Verify the INIT packet before we process it. */ int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep, - const struct sctp_association *asoc, sctp_cid_t cid, - sctp_init_chunk_t *peer_init, struct sctp_chunk *chunk, - struct sctp_chunk **errp) + const struct sctp_association *asoc, enum sctp_cid cid, + struct sctp_init_chunk *peer_init, + struct sctp_chunk *chunk, struct sctp_chunk **errp) { union sctp_params param; bool has_cookie = false; @@ -2311,7 +2307,7 @@ int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep, */ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, const union sctp_addr *peer_addr, - sctp_init_chunk_t *peer_init, gfp_t gfp) + struct sctp_init_chunk *peer_init, gfp_t gfp) { struct net *net = sock_net(asoc->base.sk); union sctp_params param; @@ -2454,7 +2450,8 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, * stream sequence number shall be set to 0. */ - if (sctp_stream_init(asoc, gfp)) + if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, + asoc->c.sinit_max_instreams, gfp)) goto clean_up; if (!asoc->temp && sctp_assoc_set_id(asoc, gfp)) @@ -2569,7 +2566,7 @@ do_addr_param: asoc->peer.ipv4_address = 1; /* Cycle through address types; avoid divide by 0. */ - sat = ntohs(param.p->length) - sizeof(sctp_paramhdr_t); + sat = ntohs(param.p->length) - sizeof(struct sctp_paramhdr); if (sat) sat /= sizeof(__u16); @@ -2596,7 +2593,7 @@ do_addr_param: case SCTP_PARAM_STATE_COOKIE: asoc->peer.cookie_len = - ntohs(param.p->length) - sizeof(sctp_paramhdr_t); + ntohs(param.p->length) - sizeof(struct sctp_paramhdr); asoc->peer.cookie = param.cookie->body; break; @@ -3180,7 +3177,7 @@ bool sctp_verify_asconf(const struct sctp_association *asoc, return false; length = ntohs(param.addip->param_hdr.length); if (length < sizeof(sctp_addip_param_t) + - sizeof(sctp_paramhdr_t)) + sizeof(**errp)) return false; break; case SCTP_PARAM_SUCCESS_REPORT: @@ -3222,7 +3219,8 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, int chunk_len; __u32 serial; - chunk_len = ntohs(asconf->chunk_hdr->length) - sizeof(sctp_chunkhdr_t); + chunk_len = ntohs(asconf->chunk_hdr->length) - + sizeof(struct sctp_chunkhdr); hdr = (sctp_addiphdr_t *)asconf->skb->data; serial = ntohl(hdr->serial); @@ -3368,7 +3366,7 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack, err_code = SCTP_ERROR_REQ_REFUSED; asconf_ack_len = ntohs(asconf_ack->chunk_hdr->length) - - sizeof(sctp_chunkhdr_t); + sizeof(struct sctp_chunkhdr); /* Skip the addiphdr from the asconf_ack chunk and store a pointer to * the first asconf_ack parameter. diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 25384fa82ba9..d6e5e9e0fd6d 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -647,7 +647,7 @@ static void sctp_cmd_assoc_failed(sctp_cmd_seq_t *commands, static int sctp_cmd_process_init(sctp_cmd_seq_t *commands, struct sctp_association *asoc, struct sctp_chunk *chunk, - sctp_init_chunk_t *peer_init, + struct sctp_init_chunk *peer_init, gfp_t gfp) { int error; @@ -818,6 +818,28 @@ static void sctp_cmd_setup_t2(sctp_cmd_seq_t *cmds, asoc->timeouts[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = t->rto; } +static void sctp_cmd_assoc_update(sctp_cmd_seq_t *cmds, + struct sctp_association *asoc, + struct sctp_association *new) +{ + struct net *net = sock_net(asoc->base.sk); + struct sctp_chunk *abort; + + if (!sctp_assoc_update(asoc, new)) + return; + + abort = sctp_make_abort(asoc, NULL, sizeof(sctp_errhdr_t)); + if (abort) { + sctp_init_cause(abort, SCTP_ERROR_RSRC_LOW, 0); + sctp_add_cmd_sf(cmds, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); + } + sctp_add_cmd_sf(cmds, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNABORTED)); + sctp_add_cmd_sf(cmds, SCTP_CMD_ASSOC_FAILED, + SCTP_PERR(SCTP_ERROR_RSRC_LOW)); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); +} + /* Helper function to change the state of an association. */ static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds, struct sctp_association *asoc, @@ -933,9 +955,10 @@ static void sctp_cmd_process_operr(sctp_cmd_seq_t *cmds, switch (err_hdr->cause) { case SCTP_ERROR_UNKNOWN_CHUNK: { - sctp_chunkhdr_t *unk_chunk_hdr; + struct sctp_chunkhdr *unk_chunk_hdr; - unk_chunk_hdr = (sctp_chunkhdr_t *)err_hdr->variable; + unk_chunk_hdr = (struct sctp_chunkhdr *) + err_hdr->variable; switch (unk_chunk_hdr->type) { /* ADDIP 4.1 A9) If the peer responds to an ASCONF with * an ERROR chunk reporting that it did not recognized @@ -1294,7 +1317,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, break; case SCTP_CMD_UPDATE_ASSOC: - sctp_assoc_update(asoc, cmd->obj.asoc); + sctp_cmd_assoc_update(commands, asoc, cmd->obj.asoc); break; case SCTP_CMD_PURGE_OUTQUEUE: @@ -1748,6 +1771,10 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, break; case SCTP_CMD_SET_ASOC: + if (asoc && local_cork) { + sctp_outq_uncork(&asoc->outqueue, gfp); + local_cork = 0; + } asoc = cmd->obj.asoc; break; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index f863b5573e42..b2a74c3823ee 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -235,7 +235,7 @@ sctp_disposition_t sctp_sf_do_4_C(struct net *net, return sctp_sf_violation_chunk(net, ep, asoc, type, arg, commands); /* Make sure that the SHUTDOWN_COMPLETE chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -345,7 +345,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net, * error, but since we don't have an association, we'll * just discard the packet. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_init_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* If the INIT is coming toward a closing socket, we'll send back @@ -360,7 +360,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net, /* Verify the INIT chunk before processing it. */ err_chunk = NULL; if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type, - (sctp_init_chunk_t *)chunk->chunk_hdr, chunk, + (struct sctp_init_chunk *)chunk->chunk_hdr, chunk, &err_chunk)) { /* This chunk contains fatal error. It is to be discarded. * Send an ABORT, with causes if there is any. @@ -368,9 +368,9 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net, if (err_chunk) { packet = sctp_abort_pkt_new(net, ep, asoc, arg, (__u8 *)(err_chunk->chunk_hdr) + - sizeof(sctp_chunkhdr_t), + sizeof(struct sctp_chunkhdr), ntohs(err_chunk->chunk_hdr->length) - - sizeof(sctp_chunkhdr_t)); + sizeof(struct sctp_chunkhdr)); sctp_chunk_free(err_chunk); @@ -389,10 +389,10 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net, } /* Grab the INIT header. */ - chunk->subh.init_hdr = (sctp_inithdr_t *)chunk->skb->data; + chunk->subh.init_hdr = (struct sctp_inithdr *)chunk->skb->data; /* Tag the variable length parameters. */ - chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(sctp_inithdr_t)); + chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(struct sctp_inithdr)); new_asoc = sctp_make_temp_asoc(ep, chunk, GFP_ATOMIC); if (!new_asoc) @@ -405,7 +405,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net, /* The call, sctp_process_init(), can fail on memory allocation. */ if (!sctp_process_init(new_asoc, chunk, sctp_source(chunk), - (sctp_init_chunk_t *)chunk->chunk_hdr, + (struct sctp_init_chunk *)chunk->chunk_hdr, GFP_ATOMIC)) goto nomem_init; @@ -417,7 +417,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net, len = 0; if (err_chunk) len = ntohs(err_chunk->chunk_hdr->length) - - sizeof(sctp_chunkhdr_t); + sizeof(struct sctp_chunkhdr); repl = sctp_make_init_ack(new_asoc, chunk, GFP_ATOMIC, len); if (!repl) @@ -437,7 +437,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net, */ unk_param = (sctp_unrecognized_param_t *) ((__u8 *)(err_chunk->chunk_hdr) + - sizeof(sctp_chunkhdr_t)); + sizeof(struct sctp_chunkhdr)); /* Replace the cause code with the "Unrecognized parameter" * parameter type. */ @@ -503,7 +503,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net, sctp_cmd_seq_t *commands) { struct sctp_chunk *chunk = arg; - sctp_init_chunk_t *initchunk; + struct sctp_init_chunk *initchunk; struct sctp_chunk *err_chunk; struct sctp_packet *packet; @@ -522,12 +522,12 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net, return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Grab the INIT header. */ - chunk->subh.init_hdr = (sctp_inithdr_t *) chunk->skb->data; + chunk->subh.init_hdr = (struct sctp_inithdr *)chunk->skb->data; /* Verify the INIT chunk before processing it. */ err_chunk = NULL; if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type, - (sctp_init_chunk_t *)chunk->chunk_hdr, chunk, + (struct sctp_init_chunk *)chunk->chunk_hdr, chunk, &err_chunk)) { sctp_error_t error = SCTP_ERROR_NO_RESOURCE; @@ -540,9 +540,9 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net, if (err_chunk) { packet = sctp_abort_pkt_new(net, ep, asoc, arg, (__u8 *)(err_chunk->chunk_hdr) + - sizeof(sctp_chunkhdr_t), + sizeof(struct sctp_chunkhdr), ntohs(err_chunk->chunk_hdr->length) - - sizeof(sctp_chunkhdr_t)); + sizeof(struct sctp_chunkhdr)); sctp_chunk_free(err_chunk); @@ -576,9 +576,9 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net, /* Tag the variable length parameters. Note that we never * convert the parameters in an INIT chunk. */ - chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(sctp_inithdr_t)); + chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(struct sctp_inithdr)); - initchunk = (sctp_init_chunk_t *) chunk->chunk_hdr; + initchunk = (struct sctp_init_chunk *)chunk->chunk_hdr; sctp_add_cmd_sf(commands, SCTP_CMD_PEER_INIT, SCTP_PEER_INIT(initchunk)); @@ -653,7 +653,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, { struct sctp_chunk *chunk = arg; struct sctp_association *new_asoc; - sctp_init_chunk_t *peer_init; + struct sctp_init_chunk *peer_init; struct sctp_chunk *repl; struct sctp_ulpevent *ev, *ai_ev = NULL; int error = 0; @@ -673,7 +673,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, * chunk header. More detailed verification is done * in sctp_unpack_cookie(). */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* If the endpoint is not listening or if the number of associations @@ -691,7 +691,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, chunk->subh.cookie_hdr = (struct sctp_signed_cookie *)chunk->skb->data; if (!pskb_pull(chunk->skb, ntohs(chunk->chunk_hdr->length) - - sizeof(sctp_chunkhdr_t))) + sizeof(struct sctp_chunkhdr))) goto nomem; /* 5.1 D) Upon reception of the COOKIE ECHO chunk, Endpoint @@ -770,9 +770,10 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, auth.skb = chunk->auth_chunk; auth.asoc = chunk->asoc; auth.sctp_hdr = chunk->sctp_hdr; - auth.chunk_hdr = (sctp_chunkhdr_t *)skb_push(chunk->auth_chunk, - sizeof(sctp_chunkhdr_t)); - skb_pull(chunk->auth_chunk, sizeof(sctp_chunkhdr_t)); + auth.chunk_hdr = (struct sctp_chunkhdr *) + skb_push(chunk->auth_chunk, + sizeof(struct sctp_chunkhdr)); + skb_pull(chunk->auth_chunk, sizeof(struct sctp_chunkhdr)); auth.transport = chunk->transport; ret = sctp_sf_authenticate(net, ep, new_asoc, type, &auth); @@ -886,7 +887,7 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(struct net *net, /* Verify that the chunk length for the COOKIE-ACK is OK. * If we don't do this, any bundled chunks may be junked. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -1080,7 +1081,7 @@ sctp_disposition_t sctp_sf_beat_8_3(struct net *net, void *arg, sctp_cmd_seq_t *commands) { - sctp_paramhdr_t *param_hdr; + struct sctp_paramhdr *param_hdr; struct sctp_chunk *chunk = arg; struct sctp_chunk *reply; size_t paylen = 0; @@ -1097,9 +1098,9 @@ sctp_disposition_t sctp_sf_beat_8_3(struct net *net, * respond with a HEARTBEAT ACK that contains the Heartbeat * Information field copied from the received HEARTBEAT chunk. */ - chunk->subh.hb_hdr = (sctp_heartbeathdr_t *) chunk->skb->data; - param_hdr = (sctp_paramhdr_t *) chunk->subh.hb_hdr; - paylen = ntohs(chunk->chunk_hdr->length) - sizeof(sctp_chunkhdr_t); + chunk->subh.hb_hdr = (sctp_heartbeathdr_t *)chunk->skb->data; + param_hdr = (struct sctp_paramhdr *)chunk->subh.hb_hdr; + paylen = ntohs(chunk->chunk_hdr->length) - sizeof(struct sctp_chunkhdr); if (ntohs(param_hdr->length) > paylen) return sctp_sf_violation_paramlen(net, ep, asoc, type, arg, @@ -1164,7 +1165,7 @@ sctp_disposition_t sctp_sf_backbeat_8_3(struct net *net, return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the HEARTBEAT-ACK chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t) + + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr) + sizeof(sctp_sender_hb_info_t))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -1449,19 +1450,19 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( * In this case, we generate a protocol violation since we have * an association established. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_init_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Grab the INIT header. */ - chunk->subh.init_hdr = (sctp_inithdr_t *) chunk->skb->data; + chunk->subh.init_hdr = (struct sctp_inithdr *)chunk->skb->data; /* Tag the variable length parameters. */ - chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(sctp_inithdr_t)); + chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(struct sctp_inithdr)); /* Verify the INIT chunk before processing it. */ err_chunk = NULL; if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type, - (sctp_init_chunk_t *)chunk->chunk_hdr, chunk, + (struct sctp_init_chunk *)chunk->chunk_hdr, chunk, &err_chunk)) { /* This chunk contains fatal error. It is to be discarded. * Send an ABORT, with causes if there is any. @@ -1469,9 +1470,9 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( if (err_chunk) { packet = sctp_abort_pkt_new(net, ep, asoc, arg, (__u8 *)(err_chunk->chunk_hdr) + - sizeof(sctp_chunkhdr_t), + sizeof(struct sctp_chunkhdr), ntohs(err_chunk->chunk_hdr->length) - - sizeof(sctp_chunkhdr_t)); + sizeof(struct sctp_chunkhdr)); if (packet) { sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, @@ -1508,7 +1509,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( * place (local tie-tag and per tie-tag) within the state cookie. */ if (!sctp_process_init(new_asoc, chunk, sctp_source(chunk), - (sctp_init_chunk_t *)chunk->chunk_hdr, + (struct sctp_init_chunk *)chunk->chunk_hdr, GFP_ATOMIC)) goto nomem; @@ -1535,7 +1536,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( len = 0; if (err_chunk) { len = ntohs(err_chunk->chunk_hdr->length) - - sizeof(sctp_chunkhdr_t); + sizeof(struct sctp_chunkhdr); } repl = sctp_make_init_ack(new_asoc, chunk, GFP_ATOMIC, len); @@ -1556,7 +1557,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( */ unk_param = (sctp_unrecognized_param_t *) ((__u8 *)(err_chunk->chunk_hdr) + - sizeof(sctp_chunkhdr_t)); + sizeof(struct sctp_chunkhdr)); /* Replace the cause code with the "Unrecognized parameter" * parameter type. */ @@ -1729,7 +1730,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(struct net *net, sctp_cmd_seq_t *commands, struct sctp_association *new_asoc) { - sctp_init_chunk_t *peer_init; + struct sctp_init_chunk *peer_init; struct sctp_ulpevent *ev; struct sctp_chunk *repl; struct sctp_chunk *err; @@ -1844,7 +1845,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(struct net *net, sctp_cmd_seq_t *commands, struct sctp_association *new_asoc) { - sctp_init_chunk_t *peer_init; + struct sctp_init_chunk *peer_init; struct sctp_chunk *repl; /* new_asoc is a brand-new association, so these are not yet @@ -2044,7 +2045,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net, * enough for the chunk header. Cookie length verification is * done later. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -2053,7 +2054,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net, */ chunk->subh.cookie_hdr = (struct sctp_signed_cookie *)chunk->skb->data; if (!pskb_pull(chunk->skb, ntohs(chunk->chunk_hdr->length) - - sizeof(sctp_chunkhdr_t))) + sizeof(struct sctp_chunkhdr))) goto nomem; /* In RFC 2960 5.2.4 3, if both Verification Tags in the State Cookie @@ -2806,7 +2807,7 @@ sctp_disposition_t sctp_sf_do_9_2_reshutack(struct net *net, struct sctp_chunk *reply; /* Make sure that the chunk has a valid length */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -2989,7 +2990,7 @@ sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net, return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_data_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_data_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -3009,7 +3010,8 @@ sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net, return SCTP_DISPOSITION_ABORT; case SCTP_IERROR_PROTO_VIOLATION: return sctp_sf_abort_violation(net, ep, asoc, chunk, commands, - (u8 *)chunk->subh.data_hdr, sizeof(sctp_datahdr_t)); + (u8 *)chunk->subh.data_hdr, + sizeof(struct sctp_datahdr)); default: BUG(); } @@ -3107,7 +3109,7 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net, return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_data_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_data_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -3123,7 +3125,8 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net, return SCTP_DISPOSITION_ABORT; case SCTP_IERROR_PROTO_VIOLATION: return sctp_sf_abort_violation(net, ep, asoc, chunk, commands, - (u8 *)chunk->subh.data_hdr, sizeof(sctp_datahdr_t)); + (u8 *)chunk->subh.data_hdr, + sizeof(struct sctp_datahdr)); default: BUG(); } @@ -3358,7 +3361,7 @@ sctp_disposition_t sctp_sf_do_9_2_final(struct net *net, return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the SHUTDOWN_ACK chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* 10.2 H) SHUTDOWN COMPLETE notification @@ -3435,7 +3438,7 @@ sctp_disposition_t sctp_sf_ootb(struct net *net, { struct sctp_chunk *chunk = arg; struct sk_buff *skb = chunk->skb; - sctp_chunkhdr_t *ch; + struct sctp_chunkhdr *ch; sctp_errhdr_t *err; __u8 *ch_end; int ootb_shut_ack = 0; @@ -3443,10 +3446,10 @@ sctp_disposition_t sctp_sf_ootb(struct net *net, SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); - ch = (sctp_chunkhdr_t *) chunk->chunk_hdr; + ch = (struct sctp_chunkhdr *)chunk->chunk_hdr; do { /* Report violation if the chunk is less then minimal */ - if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t)) + if (ntohs(ch->length) < sizeof(*ch)) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -3487,7 +3490,7 @@ sctp_disposition_t sctp_sf_ootb(struct net *net, } } - ch = (sctp_chunkhdr_t *) ch_end; + ch = (struct sctp_chunkhdr *)ch_end; } while (ch_end < skb_tail_pointer(skb)); if (ootb_shut_ack) @@ -3560,7 +3563,7 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net, /* If the chunk length is invalid, we don't want to process * the reset of the packet. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* We need to discard the rest of the packet to prevent @@ -3591,7 +3594,7 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(struct net *net, struct sctp_chunk *chunk = arg; /* Make sure that the SHUTDOWN_ACK chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -3958,7 +3961,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net, /* Silently discard the chunk if stream-id is not valid */ sctp_walk_fwdtsn(skip, chunk) { - if (ntohs(skip->stream) >= asoc->stream->incnt) + if (ntohs(skip->stream) >= asoc->stream.incnt) goto discard_noforce; } @@ -4029,7 +4032,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn_fast( /* Silently discard the chunk if stream-id is not valid */ sctp_walk_fwdtsn(skip, chunk) { - if (ntohs(skip->stream) >= asoc->stream->incnt) + if (ntohs(skip->stream) >= asoc->stream.incnt) goto gen_shutdown; } @@ -4256,7 +4259,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net, { struct sctp_chunk *unk_chunk = arg; struct sctp_chunk *err_chunk; - sctp_chunkhdr_t *hdr; + struct sctp_chunkhdr *hdr; pr_debug("%s: processing unknown chunk id:%d\n", __func__, type.chunk); @@ -4267,7 +4270,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net, * Since we don't know the chunk type, we use a general * chunkhdr structure to make a comparison. */ - if (!sctp_chunk_length_valid(unk_chunk, sizeof(sctp_chunkhdr_t))) + if (!sctp_chunk_length_valid(unk_chunk, sizeof(*hdr))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -4340,7 +4343,7 @@ sctp_disposition_t sctp_sf_discard_chunk(struct net *net, * Since we don't know the chunk type, we use a general * chunkhdr structure to make a comparison. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -4405,7 +4408,7 @@ sctp_disposition_t sctp_sf_violation(struct net *net, struct sctp_chunk *chunk = arg; /* Make sure that the chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -6121,9 +6124,9 @@ static struct sctp_packet *sctp_ootb_pkt_new(struct net *net, switch (chunk->chunk_hdr->type) { case SCTP_CID_INIT: { - sctp_init_chunk_t *init; + struct sctp_init_chunk *init; - init = (sctp_init_chunk_t *)chunk->chunk_hdr; + init = (struct sctp_init_chunk *)chunk->chunk_hdr; vtag = ntohl(init->init_hdr.init_tag); break; } @@ -6196,7 +6199,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, struct sctp_chunk *chunk, sctp_cmd_seq_t *commands) { - sctp_datahdr_t *data_hdr; + struct sctp_datahdr *data_hdr; struct sctp_chunk *err; size_t datalen; sctp_verb_t deliver; @@ -6209,8 +6212,9 @@ static int sctp_eat_data(const struct sctp_association *asoc, u16 sid; u8 ordered = 0; - data_hdr = chunk->subh.data_hdr = (sctp_datahdr_t *)chunk->skb->data; - skb_pull(chunk->skb, sizeof(sctp_datahdr_t)); + data_hdr = (struct sctp_datahdr *)chunk->skb->data; + chunk->subh.data_hdr = data_hdr; + skb_pull(chunk->skb, sizeof(*data_hdr)); tsn = ntohl(data_hdr->tsn); pr_debug("%s: TSN 0x%x\n", __func__, tsn); @@ -6258,7 +6262,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, * Actually, allow a little bit of overflow (up to a MTU). */ datalen = ntohs(chunk->chunk_hdr->length); - datalen -= sizeof(sctp_data_chunk_t); + datalen -= sizeof(struct sctp_data_chunk); deliver = SCTP_CMD_CHUNK_ULP; @@ -6365,7 +6369,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, * and discard the DATA chunk. */ sid = ntohs(data_hdr->stream); - if (sid >= asoc->stream->incnt) { + if (sid >= asoc->stream.incnt) { /* Mark tsn as received even though we drop it */ sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn)); @@ -6387,7 +6391,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, * and is invalid. */ ssn = ntohs(data_hdr->ssn); - if (ordered && SSN_lt(ssn, sctp_ssn_peek(asoc->stream, in, sid))) + if (ordered && SSN_lt(ssn, sctp_ssn_peek(&asoc->stream, in, sid))) return SCTP_IERROR_PROTO_VIOLATION; /* Send the data up to the user. Note: Schedule the diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index 419b18ebb056..3e958c1c4b95 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -53,7 +53,7 @@ static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES]; static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net, - sctp_cid_t cid, + enum sctp_cid cid, sctp_state_t state); @@ -968,7 +968,7 @@ static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][S }; static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net, - sctp_cid_t cid, + enum sctp_cid cid, sctp_state_t state) { if (state > SCTP_STATE_MAX) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 3a8318e518f1..1db478e34520 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -103,7 +103,7 @@ static int sctp_autobind(struct sock *sk); static void sctp_sock_migrate(struct sock *, struct sock *, struct sctp_association *, sctp_socket_type_t); -static int sctp_memory_pressure; +static unsigned long sctp_memory_pressure; static atomic_long_t sctp_memory_allocated; struct percpu_counter sctp_sockets_allocated; @@ -164,7 +164,7 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk) sizeof(struct sk_buff) + sizeof(struct sctp_chunk); - atomic_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc); + refcount_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc); sk->sk_wmem_queued += chunk->skb->truesize; sk_mem_charge(sk, chunk->skb->truesize); } @@ -1494,7 +1494,7 @@ static void sctp_close(struct sock *sk, long timeout) pr_debug("%s: sk:%p, timeout:%ld\n", __func__, sk, timeout); - lock_sock(sk); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); sk->sk_shutdown = SHUTDOWN_MASK; sk->sk_state = SCTP_SS_CLOSING; @@ -1544,7 +1544,7 @@ static void sctp_close(struct sock *sk, long timeout) * held and that should be grabbed before socket lock. */ spin_lock_bh(&net->sctp.addr_wq_lock); - bh_lock_sock(sk); + bh_lock_sock_nested(sk); /* Hold the sock, since sk_common_release() will put sock_put() * and we have just a little more cleanup. @@ -1920,7 +1920,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) } /* Check for invalid stream. */ - if (sinfo->sinfo_stream >= asoc->stream->outcnt) { + if (sinfo->sinfo_stream >= asoc->stream.outcnt) { err = -EINVAL; goto out_free; } @@ -4497,8 +4497,8 @@ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, info->sctpi_rwnd = asoc->a_rwnd; info->sctpi_unackdata = asoc->unack_data; info->sctpi_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map); - info->sctpi_instrms = asoc->stream->incnt; - info->sctpi_outstrms = asoc->stream->outcnt; + info->sctpi_instrms = asoc->stream.incnt; + info->sctpi_outstrms = asoc->stream.outcnt; list_for_each(pos, &asoc->base.inqueue.in_chunk_list) info->sctpi_inqueue++; list_for_each(pos, &asoc->outqueue.out_chunk_list) @@ -4726,8 +4726,8 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len, status.sstat_unackdata = asoc->unack_data; status.sstat_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map); - status.sstat_instrms = asoc->stream->incnt; - status.sstat_outstrms = asoc->stream->outcnt; + status.sstat_instrms = asoc->stream.incnt; + status.sstat_outstrms = asoc->stream.outcnt; status.sstat_fragmentation_point = asoc->frag_point; status.sstat_primary.spinfo_assoc_id = sctp_assoc2id(transport->asoc); memcpy(&status.sstat_primary.spinfo_address, &transport->ipaddr, @@ -4933,11 +4933,47 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp) } EXPORT_SYMBOL(sctp_do_peeloff); +static int sctp_getsockopt_peeloff_common(struct sock *sk, sctp_peeloff_arg_t *peeloff, + struct file **newfile, unsigned flags) +{ + struct socket *newsock; + int retval; + + retval = sctp_do_peeloff(sk, peeloff->associd, &newsock); + if (retval < 0) + goto out; + + /* Map the socket to an unused fd that can be returned to the user. */ + retval = get_unused_fd_flags(flags & SOCK_CLOEXEC); + if (retval < 0) { + sock_release(newsock); + goto out; + } + + *newfile = sock_alloc_file(newsock, 0, NULL); + if (IS_ERR(*newfile)) { + put_unused_fd(retval); + sock_release(newsock); + retval = PTR_ERR(*newfile); + *newfile = NULL; + return retval; + } + + pr_debug("%s: sk:%p, newsk:%p, sd:%d\n", __func__, sk, newsock->sk, + retval); + + peeloff->sd = retval; + + if (flags & SOCK_NONBLOCK) + (*newfile)->f_flags |= O_NONBLOCK; +out: + return retval; +} + static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval, int __user *optlen) { sctp_peeloff_arg_t peeloff; - struct socket *newsock; - struct file *newfile; + struct file *newfile = NULL; int retval = 0; if (len < sizeof(sctp_peeloff_arg_t)) @@ -4946,26 +4982,44 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval if (copy_from_user(&peeloff, optval, len)) return -EFAULT; - retval = sctp_do_peeloff(sk, peeloff.associd, &newsock); + retval = sctp_getsockopt_peeloff_common(sk, &peeloff, &newfile, 0); if (retval < 0) goto out; - /* Map the socket to an unused fd that can be returned to the user. */ - retval = get_unused_fd_flags(0); - if (retval < 0) { - sock_release(newsock); - goto out; + /* Return the fd mapped to the new socket. */ + if (put_user(len, optlen)) { + fput(newfile); + put_unused_fd(retval); + return -EFAULT; } - newfile = sock_alloc_file(newsock, 0, NULL); - if (IS_ERR(newfile)) { + if (copy_to_user(optval, &peeloff, len)) { + fput(newfile); put_unused_fd(retval); - sock_release(newsock); - return PTR_ERR(newfile); + return -EFAULT; } + fd_install(retval, newfile); +out: + return retval; +} - pr_debug("%s: sk:%p, newsk:%p, sd:%d\n", __func__, sk, newsock->sk, - retval); +static int sctp_getsockopt_peeloff_flags(struct sock *sk, int len, + char __user *optval, int __user *optlen) +{ + sctp_peeloff_flags_arg_t peeloff; + struct file *newfile = NULL; + int retval = 0; + + if (len < sizeof(sctp_peeloff_flags_arg_t)) + return -EINVAL; + len = sizeof(sctp_peeloff_flags_arg_t); + if (copy_from_user(&peeloff, optval, len)) + return -EFAULT; + + retval = sctp_getsockopt_peeloff_common(sk, &peeloff.p_arg, + &newfile, peeloff.flags); + if (retval < 0) + goto out; /* Return the fd mapped to the new socket. */ if (put_user(len, optlen)) { @@ -4973,7 +5027,7 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval put_unused_fd(retval); return -EFAULT; } - peeloff.sd = retval; + if (copy_to_user(optval, &peeloff, len)) { fput(newfile); put_unused_fd(retval); @@ -6033,7 +6087,8 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len, return -EACCES; hmacs = ep->auth_hmacs_list; - data_len = ntohs(hmacs->param_hdr.length) - sizeof(sctp_paramhdr_t); + data_len = ntohs(hmacs->param_hdr.length) - + sizeof(struct sctp_paramhdr); if (len < sizeof(struct sctp_hmacalgo) + data_len) return -EINVAL; @@ -6117,7 +6172,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, goto num; /* See if the user provided enough room for all the data */ - num_chunks = ntohs(ch->param_hdr.length) - sizeof(sctp_paramhdr_t); + num_chunks = ntohs(ch->param_hdr.length) - sizeof(struct sctp_paramhdr); if (len < num_chunks) return -EINVAL; @@ -6165,7 +6220,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, if (!ch) goto num; - num_chunks = ntohs(ch->param_hdr.length) - sizeof(sctp_paramhdr_t); + num_chunks = ntohs(ch->param_hdr.length) - sizeof(struct sctp_paramhdr); if (len < sizeof(struct sctp_authchunks) + num_chunks) return -EINVAL; @@ -6599,10 +6654,10 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len, goto out; asoc = sctp_id2assoc(sk, params.sprstat_assoc_id); - if (!asoc || params.sprstat_sid >= asoc->stream->outcnt) + if (!asoc || params.sprstat_sid >= asoc->stream.outcnt) goto out; - streamout = &asoc->stream->out[params.sprstat_sid]; + streamout = &asoc->stream.out[params.sprstat_sid]; if (policy == SCTP_PR_SCTP_NONE) { params.sprstat_abandoned_unsent = 0; params.sprstat_abandoned_sent = 0; @@ -6758,6 +6813,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, case SCTP_SOCKOPT_PEELOFF: retval = sctp_getsockopt_peeloff(sk, len, optval, optlen); break; + case SCTP_SOCKOPT_PEELOFF_FLAGS: + retval = sctp_getsockopt_peeloff_flags(sk, len, optval, optlen); + break; case SCTP_PEER_ADDR_PARAMS: retval = sctp_getsockopt_peer_addr_params(sk, len, optval, optlen); @@ -7563,7 +7621,7 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, if (flags & MSG_PEEK) { skb = skb_peek(&sk->sk_receive_queue); if (skb) - atomic_inc(&skb->users); + refcount_inc(&skb->users); } else { skb = __skb_dequeue(&sk->sk_receive_queue); } @@ -7684,7 +7742,7 @@ static void sctp_wfree(struct sk_buff *skb) sizeof(struct sk_buff) + sizeof(struct sctp_chunk); - atomic_sub(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc); + WARN_ON(refcount_sub_and_test(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc)); /* * This undoes what is done via sctp_set_owner_w and sk_mem_charge diff --git a/net/sctp/stream.c b/net/sctp/stream.c index dda53a293986..63ea15503714 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -35,70 +35,43 @@ #include <net/sctp/sctp.h> #include <net/sctp/sm.h> -int sctp_stream_new(struct sctp_association *asoc, gfp_t gfp) +int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, + gfp_t gfp) { - struct sctp_stream *stream; - int i; - - stream = kzalloc(sizeof(*stream), gfp); - if (!stream) - return -ENOMEM; - - stream->outcnt = asoc->c.sinit_num_ostreams; - stream->out = kcalloc(stream->outcnt, sizeof(*stream->out), gfp); - if (!stream->out) { - kfree(stream); - return -ENOMEM; - } - for (i = 0; i < stream->outcnt; i++) - stream->out[i].state = SCTP_STREAM_OPEN; - - asoc->stream = stream; - - return 0; -} - -int sctp_stream_init(struct sctp_association *asoc, gfp_t gfp) -{ - struct sctp_stream *stream = asoc->stream; int i; /* Initial stream->out size may be very big, so free it and alloc * a new one with new outcnt to save memory. */ kfree(stream->out); - stream->outcnt = asoc->c.sinit_num_ostreams; - stream->out = kcalloc(stream->outcnt, sizeof(*stream->out), gfp); + + stream->out = kcalloc(outcnt, sizeof(*stream->out), gfp); if (!stream->out) - goto nomem; + return -ENOMEM; + stream->outcnt = outcnt; for (i = 0; i < stream->outcnt; i++) stream->out[i].state = SCTP_STREAM_OPEN; - stream->incnt = asoc->c.sinit_max_instreams; - stream->in = kcalloc(stream->incnt, sizeof(*stream->in), gfp); + if (!incnt) + return 0; + + stream->in = kcalloc(incnt, sizeof(*stream->in), gfp); if (!stream->in) { kfree(stream->out); - goto nomem; + stream->out = NULL; + return -ENOMEM; } - return 0; - -nomem: - asoc->stream = NULL; - kfree(stream); + stream->incnt = incnt; - return -ENOMEM; + return 0; } void sctp_stream_free(struct sctp_stream *stream) { - if (unlikely(!stream)) - return; - kfree(stream->out); kfree(stream->in); - kfree(stream); } void sctp_stream_clear(struct sctp_stream *stream) @@ -112,6 +85,19 @@ void sctp_stream_clear(struct sctp_stream *stream) stream->in[i].ssn = 0; } +void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new) +{ + sctp_stream_free(stream); + + stream->out = new->out; + stream->in = new->in; + stream->outcnt = new->outcnt; + stream->incnt = new->incnt; + + new->out = NULL; + new->in = NULL; +} + static int sctp_send_reconf(struct sctp_association *asoc, struct sctp_chunk *chunk) { @@ -128,7 +114,7 @@ static int sctp_send_reconf(struct sctp_association *asoc, int sctp_send_reset_streams(struct sctp_association *asoc, struct sctp_reset_streams *params) { - struct sctp_stream *stream = asoc->stream; + struct sctp_stream *stream = &asoc->stream; __u16 i, str_nums, *str_list; struct sctp_chunk *chunk; int retval = -EINVAL; @@ -214,6 +200,7 @@ out: int sctp_send_reset_assoc(struct sctp_association *asoc) { + struct sctp_stream *stream = &asoc->stream; struct sctp_chunk *chunk = NULL; int retval; __u16 i; @@ -230,8 +217,8 @@ int sctp_send_reset_assoc(struct sctp_association *asoc) return -ENOMEM; /* Block further xmit of data until this request is completed */ - for (i = 0; i < asoc->stream->outcnt; i++) - asoc->stream->out[i].state = SCTP_STREAM_CLOSED; + for (i = 0; i < stream->outcnt; i++) + stream->out[i].state = SCTP_STREAM_CLOSED; asoc->strreset_chunk = chunk; sctp_chunk_hold(asoc->strreset_chunk); @@ -241,8 +228,8 @@ int sctp_send_reset_assoc(struct sctp_association *asoc) sctp_chunk_put(asoc->strreset_chunk); asoc->strreset_chunk = NULL; - for (i = 0; i < asoc->stream->outcnt; i++) - asoc->stream->out[i].state = SCTP_STREAM_OPEN; + for (i = 0; i < stream->outcnt; i++) + stream->out[i].state = SCTP_STREAM_OPEN; return retval; } @@ -255,7 +242,7 @@ int sctp_send_reset_assoc(struct sctp_association *asoc) int sctp_send_add_streams(struct sctp_association *asoc, struct sctp_add_streams *params) { - struct sctp_stream *stream = asoc->stream; + struct sctp_stream *stream = &asoc->stream; struct sctp_chunk *chunk = NULL; int retval = -ENOMEM; __u32 outcnt, incnt; @@ -317,7 +304,7 @@ out: return retval; } -static sctp_paramhdr_t *sctp_chunk_lookup_strreset_param( +static struct sctp_paramhdr *sctp_chunk_lookup_strreset_param( struct sctp_association *asoc, __u32 resp_seq, __be16 type) { @@ -357,7 +344,7 @@ struct sctp_chunk *sctp_process_strreset_outreq( struct sctp_ulpevent **evp) { struct sctp_strreset_outreq *outreq = param.v; - struct sctp_stream *stream = asoc->stream; + struct sctp_stream *stream = &asoc->stream; __u16 i, nums, flags = 0, *str_p = NULL; __u32 result = SCTP_STRRESET_DENIED; __u32 request_seq; @@ -449,7 +436,7 @@ struct sctp_chunk *sctp_process_strreset_inreq( struct sctp_ulpevent **evp) { struct sctp_strreset_inreq *inreq = param.v; - struct sctp_stream *stream = asoc->stream; + struct sctp_stream *stream = &asoc->stream; __u32 result = SCTP_STRRESET_DENIED; struct sctp_chunk *chunk = NULL; __u16 i, nums, *str_p; @@ -523,7 +510,7 @@ struct sctp_chunk *sctp_process_strreset_tsnreq( { __u32 init_tsn = 0, next_tsn = 0, max_tsn_seen; struct sctp_strreset_tsnreq *tsnreq = param.v; - struct sctp_stream *stream = asoc->stream; + struct sctp_stream *stream = &asoc->stream; __u32 result = SCTP_STRRESET_DENIED; __u32 request_seq; __u16 i; @@ -612,7 +599,7 @@ struct sctp_chunk *sctp_process_strreset_addstrm_out( struct sctp_ulpevent **evp) { struct sctp_strreset_addstrm *addstrm = param.v; - struct sctp_stream *stream = asoc->stream; + struct sctp_stream *stream = &asoc->stream; __u32 result = SCTP_STRRESET_DENIED; struct sctp_stream_in *streamin; __u32 request_seq, incnt; @@ -687,7 +674,7 @@ struct sctp_chunk *sctp_process_strreset_addstrm_in( struct sctp_ulpevent **evp) { struct sctp_strreset_addstrm *addstrm = param.v; - struct sctp_stream *stream = asoc->stream; + struct sctp_stream *stream = &asoc->stream; __u32 result = SCTP_STRRESET_DENIED; struct sctp_stream_out *streamout; struct sctp_chunk *chunk = NULL; @@ -758,11 +745,11 @@ struct sctp_chunk *sctp_process_strreset_resp( union sctp_params param, struct sctp_ulpevent **evp) { + struct sctp_stream *stream = &asoc->stream; struct sctp_strreset_resp *resp = param.v; - struct sctp_stream *stream = asoc->stream; struct sctp_transport *t; __u16 i, nums, flags = 0; - sctp_paramhdr_t *req; + struct sctp_paramhdr *req; __u32 result; req = sctp_chunk_lookup_strreset_param(asoc, resp->response_seq, 0); diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 721eeebfcd8a..80a97c8501a7 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -99,7 +99,7 @@ static struct sctp_transport *sctp_transport_init(struct net *net, /* Initialize the 64-bit random nonce sent with heartbeat. */ get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce)); - atomic_set(&peer->refcnt, 1); + refcount_set(&peer->refcnt, 1); return peer; } @@ -172,7 +172,7 @@ static void sctp_transport_destroy_rcu(struct rcu_head *head) */ static void sctp_transport_destroy(struct sctp_transport *transport) { - if (unlikely(atomic_read(&transport->refcnt))) { + if (unlikely(refcount_read(&transport->refcnt))) { WARN(1, "Attempt to destroy undead transport %p!\n", transport); return; } @@ -311,7 +311,7 @@ void sctp_transport_route(struct sctp_transport *transport, /* Hold a reference to a transport. */ int sctp_transport_hold(struct sctp_transport *transport) { - return atomic_add_unless(&transport->refcnt, 1, 0); + return refcount_inc_not_zero(&transport->refcnt); } /* Release a reference to a transport and clean up @@ -319,7 +319,7 @@ int sctp_transport_hold(struct sctp_transport *transport) */ void sctp_transport_put(struct sctp_transport *transport) { - if (atomic_dec_and_test(&transport->refcnt)) + if (refcount_dec_and_test(&transport->refcnt)) sctp_transport_destroy(transport); } @@ -405,14 +405,6 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport, TSN_lte(asoc->fast_recovery_exit, sack_ctsn)) asoc->fast_recovery = 0; - /* The appropriate cwnd increase algorithm is performed if, and only - * if the cumulative TSN whould advanced and the congestion window is - * being fully utilized. - */ - if (TSN_lte(sack_ctsn, transport->asoc->ctsn_ack_point) || - (flight_size < cwnd)) - return; - ssthresh = transport->ssthresh; pba = transport->partial_bytes_acked; pmtu = transport->asoc->pathmtu; @@ -435,6 +427,14 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport, if (asoc->fast_recovery) return; + /* The appropriate cwnd increase algorithm is performed + * if, and only if the congestion window is being fully + * utilized. Note that RFC4960 Errata 3.22 removed the + * other condition on ctsn moving. + */ + if (flight_size < cwnd) + return; + if (bytes_acked > pmtu) cwnd += pmtu; else @@ -446,23 +446,33 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport, flight_size, pba); } else { /* RFC 2960 7.2.2 Whenever cwnd is greater than ssthresh, - * upon each SACK arrival that advances the Cumulative TSN Ack - * Point, increase partial_bytes_acked by the total number of - * bytes of all new chunks acknowledged in that SACK including - * chunks acknowledged by the new Cumulative TSN Ack and by - * Gap Ack Blocks. + * upon each SACK arrival, increase partial_bytes_acked + * by the total number of bytes of all new chunks + * acknowledged in that SACK including chunks + * acknowledged by the new Cumulative TSN Ack and by Gap + * Ack Blocks. (updated by RFC4960 Errata 3.22) + * + * When partial_bytes_acked is greater than cwnd and + * before the arrival of the SACK the sender had less + * bytes of data outstanding than cwnd (i.e., before + * arrival of the SACK, flightsize was less than cwnd), + * reset partial_bytes_acked to cwnd. (RFC 4960 Errata + * 3.26) * - * When partial_bytes_acked is equal to or greater than cwnd - * and before the arrival of the SACK the sender had cwnd or - * more bytes of data outstanding (i.e., before arrival of the - * SACK, flightsize was greater than or equal to cwnd), - * increase cwnd by MTU, and reset partial_bytes_acked to - * (partial_bytes_acked - cwnd). + * When partial_bytes_acked is equal to or greater than + * cwnd and before the arrival of the SACK the sender + * had cwnd or more bytes of data outstanding (i.e., + * before arrival of the SACK, flightsize was greater + * than or equal to cwnd), partial_bytes_acked is reset + * to (partial_bytes_acked - cwnd). Next, cwnd is + * increased by MTU. (RFC 4960 Errata 3.12) */ pba += bytes_acked; - if (pba >= cwnd) { + if (pba > cwnd && flight_size < cwnd) + pba = cwnd; + if (pba >= cwnd && flight_size >= cwnd) { + pba = pba - cwnd; cwnd += pmtu; - pba = ((cwnd < pba) ? (pba - cwnd) : 0); } pr_debug("%s: congestion avoidance: transport:%p, " @@ -559,6 +569,8 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, */ transport->cwnd = max(transport->cwnd/2, 4*asoc->pathmtu); + /* RFC 4960 Errata 3.27.2: also adjust sshthresh */ + transport->ssthresh = transport->cwnd; break; } diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index ec2b3e013c2f..5f86c5062a98 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -153,13 +153,12 @@ struct sctp_ulpevent *sctp_ulpevent_make_assoc_change( sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize); /* Include the notification structure */ - sac = (struct sctp_assoc_change *) - skb_push(skb, sizeof(struct sctp_assoc_change)); + sac = skb_push(skb, sizeof(struct sctp_assoc_change)); /* Trim the buffer to the right length. */ skb_trim(skb, sizeof(struct sctp_assoc_change) + ntohs(chunk->chunk_hdr->length) - - sizeof(sctp_chunkhdr_t)); + sizeof(struct sctp_chunkhdr)); } else { event = sctp_ulpevent_new(sizeof(struct sctp_assoc_change), MSG_NOTIFICATION, gfp); @@ -167,8 +166,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_assoc_change( goto fail; skb = sctp_event2skb(event); - sac = (struct sctp_assoc_change *) skb_put(skb, - sizeof(struct sctp_assoc_change)); + sac = skb_put(skb, sizeof(struct sctp_assoc_change)); } /* Socket Extensions for SCTP @@ -270,8 +268,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change( goto fail; skb = sctp_event2skb(event); - spc = (struct sctp_paddr_change *) - skb_put(skb, sizeof(struct sctp_paddr_change)); + spc = skb_put(skb, sizeof(struct sctp_paddr_change)); /* Sockets API Extensions for SCTP * Section 5.3.1.2 SCTP_PEER_ADDR_CHANGE @@ -402,7 +399,7 @@ sctp_ulpevent_make_remote_error(const struct sctp_association *asoc, event = sctp_skb2event(skb); sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize); - sre = (struct sctp_remote_error *) skb_push(skb, sizeof(*sre)); + sre = skb_push(skb, sizeof(*sre)); /* Trim the buffer to the right length. */ skb_trim(skb, sizeof(*sre) + elen); @@ -453,8 +450,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_send_failed( event = sctp_skb2event(skb); sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize); - ssf = (struct sctp_send_failed *) - skb_push(skb, sizeof(struct sctp_send_failed)); + ssf = skb_push(skb, sizeof(struct sctp_send_failed)); /* Socket Extensions for SCTP * 5.3.1.4 SCTP_SEND_FAILED @@ -549,8 +545,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_shutdown_event( goto fail; skb = sctp_event2skb(event); - sse = (struct sctp_shutdown_event *) - skb_put(skb, sizeof(struct sctp_shutdown_event)); + sse = skb_put(skb, sizeof(struct sctp_shutdown_event)); /* Socket Extensions for SCTP * 5.3.1.5 SCTP_SHUTDOWN_EVENT @@ -612,8 +607,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_adaptation_indication( goto fail; skb = sctp_event2skb(event); - sai = (struct sctp_adaptation_event *) - skb_put(skb, sizeof(struct sctp_adaptation_event)); + sai = skb_put(skb, sizeof(struct sctp_adaptation_event)); sai->sai_type = SCTP_ADAPTATION_INDICATION; sai->sai_flags = 0; @@ -751,8 +745,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_pdapi( goto fail; skb = sctp_event2skb(event); - pd = (struct sctp_pdapi_event *) - skb_put(skb, sizeof(struct sctp_pdapi_event)); + pd = skb_put(skb, sizeof(struct sctp_pdapi_event)); /* pdapi_type * It should be SCTP_PARTIAL_DELIVERY_EVENT @@ -803,8 +796,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_authkey( goto fail; skb = sctp_event2skb(event); - ak = (struct sctp_authkey_event *) - skb_put(skb, sizeof(struct sctp_authkey_event)); + ak = skb_put(skb, sizeof(struct sctp_authkey_event)); ak->auth_type = SCTP_AUTHENTICATION_EVENT; ak->auth_flags = 0; @@ -842,8 +834,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_sender_dry_event( return NULL; skb = sctp_event2skb(event); - sdry = (struct sctp_sender_dry_event *) - skb_put(skb, sizeof(struct sctp_sender_dry_event)); + sdry = skb_put(skb, sizeof(struct sctp_sender_dry_event)); sdry->sender_dry_type = SCTP_SENDER_DRY_EVENT; sdry->sender_dry_flags = 0; @@ -869,7 +860,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_stream_reset_event( return NULL; skb = sctp_event2skb(event); - sreset = (struct sctp_stream_reset_event *)skb_put(skb, length); + sreset = skb_put(skb, length); sreset->strreset_type = SCTP_STREAM_RESET_EVENT; sreset->strreset_flags = flags; @@ -897,8 +888,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_assoc_reset_event( return NULL; skb = sctp_event2skb(event); - areset = (struct sctp_assoc_reset_event *) - skb_put(skb, sizeof(struct sctp_assoc_reset_event)); + areset = skb_put(skb, sizeof(struct sctp_assoc_reset_event)); areset->assocreset_type = SCTP_ASSOC_RESET_EVENT; areset->assocreset_flags = flags; @@ -925,8 +915,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_stream_change_event( return NULL; skb = sctp_event2skb(event); - schange = (struct sctp_stream_change_event *) - skb_put(skb, sizeof(struct sctp_stream_change_event)); + schange = skb_put(skb, sizeof(struct sctp_stream_change_event)); schange->strchange_type = SCTP_STREAM_CHANGE_EVENT; schange->strchange_flags = flags; diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index aa3624d50278..0225d62a869f 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -764,7 +764,7 @@ static void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq, __u16 sid, csid, cssn; sid = event->stream; - stream = ulpq->asoc->stream; + stream = &ulpq->asoc->stream; event_list = (struct sk_buff_head *) sctp_event2skb(event)->prev; @@ -858,7 +858,7 @@ static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *ulpq, /* Note: The stream ID must be verified before this routine. */ sid = event->stream; ssn = event->ssn; - stream = ulpq->asoc->stream; + stream = &ulpq->asoc->stream; /* Is this the expected SSN for this stream ID? */ if (ssn != sctp_ssn_peek(stream, in, sid)) { @@ -893,7 +893,7 @@ static void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq, __u16 sid) struct sk_buff_head *lobby = &ulpq->lobby; __u16 csid, cssn; - stream = ulpq->asoc->stream; + stream = &ulpq->asoc->stream; /* We are holding the chunks by stream, by SSN. */ skb_queue_head_init(&temp); @@ -958,7 +958,7 @@ void sctp_ulpq_skip(struct sctp_ulpq *ulpq, __u16 sid, __u16 ssn) struct sctp_stream *stream; /* Note: The stream ID must be verified before this routine. */ - stream = ulpq->asoc->stream; + stream = &ulpq->asoc->stream; /* Is this an old SSN? If so ignore. */ if (SSN_lt(ssn, sctp_ssn_peek(stream, in, sid))) @@ -1090,7 +1090,7 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, if (chunk) { needed = ntohs(chunk->chunk_hdr->length); - needed -= sizeof(sctp_data_chunk_t); + needed -= sizeof(struct sctp_data_chunk); } else needed = SCTP_DEFAULT_MAXWINDOW; diff --git a/net/socket.c b/net/socket.c index c2564eb25c6b..8f9dab330d57 100644 --- a/net/socket.c +++ b/net/socket.c @@ -461,7 +461,7 @@ EXPORT_SYMBOL(sock_from_file); * @err: pointer to an error code return * * The file handle passed in is locked and the socket it is bound - * too is returned. If an error occurs the err pointer is overwritten + * to is returned. If an error occurs the err pointer is overwritten * with a negative errno code and NULL is returned. The function checks * for both invalid handles and passing a handle which is not a socket. * @@ -662,6 +662,40 @@ static bool skb_is_err_queue(const struct sk_buff *skb) return skb->pkt_type == PACKET_OUTGOING; } +/* On transmit, software and hardware timestamps are returned independently. + * As the two skb clones share the hardware timestamp, which may be updated + * before the software timestamp is received, a hardware TX timestamp may be + * returned only if there is no software TX timestamp. Ignore false software + * timestamps, which may be made in the __sock_recv_timestamp() call when the + * option SO_TIMESTAMP(NS) is enabled on the socket, even when the skb has a + * hardware timestamp. + */ +static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp) +{ + return skb->tstamp && !false_tstamp && skb_is_err_queue(skb); +} + +static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb) +{ + struct scm_ts_pktinfo ts_pktinfo; + struct net_device *orig_dev; + + if (!skb_mac_header_was_set(skb)) + return; + + memset(&ts_pktinfo, 0, sizeof(ts_pktinfo)); + + rcu_read_lock(); + orig_dev = dev_get_by_napi_id(skb_napi_id(skb)); + if (orig_dev) + ts_pktinfo.if_index = orig_dev->ifindex; + rcu_read_unlock(); + + ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb); + put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO, + sizeof(ts_pktinfo), &ts_pktinfo); +} + /* * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) */ @@ -670,14 +704,16 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, { int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP); struct scm_timestamping tss; - int empty = 1; + int empty = 1, false_tstamp = 0; struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); /* Race occurred between timestamp enabling and packet receiving. Fill in the current time for now. */ - if (need_software_tstamp && skb->tstamp == 0) + if (need_software_tstamp && skb->tstamp == 0) { __net_timestamp(skb); + false_tstamp = 1; + } if (need_software_tstamp) { if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) { @@ -699,8 +735,13 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, empty = 0; if (shhwtstamps && (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) && - ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) + !skb_is_swtx_tstamp(skb, false_tstamp) && + ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) { empty = 0; + if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) && + !skb_is_err_queue(skb)) + put_ts_pktinfo(msg, skb); + } if (!empty) { put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING, sizeof(tss), &tss); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 4f16953e4954..9463af4b32e8 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -117,14 +117,14 @@ static const struct rpc_pipe_ops gss_upcall_ops_v1; static inline struct gss_cl_ctx * gss_get_ctx(struct gss_cl_ctx *ctx) { - atomic_inc(&ctx->count); + refcount_inc(&ctx->count); return ctx; } static inline void gss_put_ctx(struct gss_cl_ctx *ctx) { - if (atomic_dec_and_test(&ctx->count)) + if (refcount_dec_and_test(&ctx->count)) gss_free_ctx(ctx); } @@ -200,7 +200,7 @@ gss_alloc_context(void) ctx->gc_proc = RPC_GSS_PROC_DATA; ctx->gc_seq = 1; /* NetApp 6.4R1 doesn't accept seq. no. 0 */ spin_lock_init(&ctx->gc_seq_lock); - atomic_set(&ctx->count,1); + refcount_set(&ctx->count,1); } return ctx; } @@ -287,7 +287,7 @@ err: #define UPCALL_BUF_LEN 128 struct gss_upcall_msg { - atomic_t count; + refcount_t count; kuid_t uid; struct rpc_pipe_msg msg; struct list_head list; @@ -328,7 +328,7 @@ static void gss_release_msg(struct gss_upcall_msg *gss_msg) { struct net *net = gss_msg->auth->net; - if (!atomic_dec_and_test(&gss_msg->count)) + if (!refcount_dec_and_test(&gss_msg->count)) return; put_pipe_version(net); BUG_ON(!list_empty(&gss_msg->list)); @@ -348,7 +348,7 @@ __gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth continue; if (auth && pos->auth->service != auth->service) continue; - atomic_inc(&pos->count); + refcount_inc(&pos->count); dprintk("RPC: %s found msg %p\n", __func__, pos); return pos; } @@ -369,7 +369,7 @@ gss_add_msg(struct gss_upcall_msg *gss_msg) spin_lock(&pipe->lock); old = __gss_find_upcall(pipe, gss_msg->uid, gss_msg->auth); if (old == NULL) { - atomic_inc(&gss_msg->count); + refcount_inc(&gss_msg->count); list_add(&gss_msg->list, &pipe->in_downcall); } else gss_msg = old; @@ -383,7 +383,7 @@ __gss_unhash_msg(struct gss_upcall_msg *gss_msg) list_del_init(&gss_msg->list); rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno); wake_up_all(&gss_msg->waitqueue); - atomic_dec(&gss_msg->count); + refcount_dec(&gss_msg->count); } static void @@ -506,7 +506,7 @@ gss_alloc_msg(struct gss_auth *gss_auth, INIT_LIST_HEAD(&gss_msg->list); rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq"); init_waitqueue_head(&gss_msg->waitqueue); - atomic_set(&gss_msg->count, 1); + refcount_set(&gss_msg->count, 1); gss_msg->uid = uid; gss_msg->auth = gss_auth; switch (vers) { @@ -542,11 +542,11 @@ gss_setup_upcall(struct gss_auth *gss_auth, struct rpc_cred *cred) gss_msg = gss_add_msg(gss_new); if (gss_msg == gss_new) { int res; - atomic_inc(&gss_msg->count); + refcount_inc(&gss_msg->count); res = rpc_queue_upcall(gss_new->pipe, &gss_new->msg); if (res) { gss_unhash_msg(gss_new); - atomic_dec(&gss_msg->count); + refcount_dec(&gss_msg->count); gss_release_msg(gss_new); gss_msg = ERR_PTR(res); } @@ -595,7 +595,7 @@ gss_refresh_upcall(struct rpc_task *task) task->tk_timeout = 0; gss_cred->gc_upcall = gss_msg; /* gss_upcall_callback will release the reference to gss_upcall_msg */ - atomic_inc(&gss_msg->count); + refcount_inc(&gss_msg->count); rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback); } else { gss_handle_downcall_result(gss_cred, gss_msg); @@ -815,7 +815,7 @@ restart: if (!list_empty(&gss_msg->msg.list)) continue; gss_msg->msg.errno = -EPIPE; - atomic_inc(&gss_msg->count); + refcount_inc(&gss_msg->count); __gss_unhash_msg(gss_msg); spin_unlock(&pipe->lock); gss_release_msg(gss_msg); @@ -834,7 +834,7 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg *msg) if (msg->errno < 0) { dprintk("RPC: %s releasing msg %p\n", __func__, gss_msg); - atomic_inc(&gss_msg->count); + refcount_inc(&gss_msg->count); gss_unhash_msg(gss_msg); if (msg->errno == -ETIMEDOUT) warn_gssd(); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 8d40a7d31c99..25dc67ef9d37 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -571,24 +571,17 @@ int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj, } EXPORT_SYMBOL_GPL(switchdev_port_obj_dump); -static RAW_NOTIFIER_HEAD(switchdev_notif_chain); +static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain); /** * register_switchdev_notifier - Register notifier * @nb: notifier_block * - * Register switch device notifier. This should be used by code - * which needs to monitor events happening in particular device. - * Return values are same as for atomic_notifier_chain_register(). + * Register switch device notifier. */ int register_switchdev_notifier(struct notifier_block *nb) { - int err; - - rtnl_lock(); - err = raw_notifier_chain_register(&switchdev_notif_chain, nb); - rtnl_unlock(); - return err; + return atomic_notifier_chain_register(&switchdev_notif_chain, nb); } EXPORT_SYMBOL_GPL(register_switchdev_notifier); @@ -597,16 +590,10 @@ EXPORT_SYMBOL_GPL(register_switchdev_notifier); * @nb: notifier_block * * Unregister switch device notifier. - * Return values are same as for atomic_notifier_chain_unregister(). */ int unregister_switchdev_notifier(struct notifier_block *nb) { - int err; - - rtnl_lock(); - err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb); - rtnl_unlock(); - return err; + return atomic_notifier_chain_unregister(&switchdev_notif_chain, nb); } EXPORT_SYMBOL_GPL(unregister_switchdev_notifier); @@ -616,18 +603,13 @@ EXPORT_SYMBOL_GPL(unregister_switchdev_notifier); * @dev: port device * @info: notifier information data * - * Call all network notifier blocks. This should be called by driver - * when it needs to propagate hardware event. - * Return values are same as for atomic_notifier_call_chain(). - * rtnl_lock must be held. + * Call all network notifier blocks. */ int call_switchdev_notifiers(unsigned long val, struct net_device *dev, struct switchdev_notifier_info *info) { - ASSERT_RTNL(); - info->dev = dev; - return raw_notifier_call_chain(&switchdev_notif_chain, val, info); + return atomic_notifier_call_chain(&switchdev_notif_chain, val, info); } EXPORT_SYMBOL_GPL(call_switchdev_notifiers); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 1b92b72e812f..101e3597338f 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2313,7 +2313,7 @@ static void tipc_sk_remove(struct tipc_sock *tsk) struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id); if (!rhashtable_remove_fast(&tn->sk_rht, &tsk->node, tsk_rht_params)) { - WARN_ON(atomic_read(&sk->sk_refcnt) == 1); + WARN_ON(refcount_read(&sk->sk_refcnt) == 1); __sock_put(sk); } } diff --git a/net/tls/Kconfig b/net/tls/Kconfig new file mode 100644 index 000000000000..eb583038c67e --- /dev/null +++ b/net/tls/Kconfig @@ -0,0 +1,15 @@ +# +# TLS configuration +# +config TLS + tristate "Transport Layer Security support" + depends on INET + select CRYPTO + select CRYPTO_AES + select CRYPTO_GCM + default n + ---help--- + Enable kernel support for TLS protocol. This allows symmetric + encryption handling of the TLS protocol to be done in-kernel. + + If unsure, say N. diff --git a/net/tls/Makefile b/net/tls/Makefile new file mode 100644 index 000000000000..a930fd1c4f7b --- /dev/null +++ b/net/tls/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the TLS subsystem. +# + +obj-$(CONFIG_TLS) += tls.o + +tls-y := tls_main.o tls_sw.o diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c new file mode 100644 index 000000000000..a03130a47b85 --- /dev/null +++ b/net/tls/tls_main.c @@ -0,0 +1,489 @@ +/* + * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved. + * Copyright (c) 2016-2017, Dave Watson <davejwatson@fb.com>. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/module.h> + +#include <net/tcp.h> +#include <net/inet_common.h> +#include <linux/highmem.h> +#include <linux/netdevice.h> +#include <linux/sched/signal.h> + +#include <net/tls.h> + +MODULE_AUTHOR("Mellanox Technologies"); +MODULE_DESCRIPTION("Transport Layer Security Support"); +MODULE_LICENSE("Dual BSD/GPL"); + +static struct proto tls_base_prot; +static struct proto tls_sw_prot; + +int wait_on_pending_writer(struct sock *sk, long *timeo) +{ + int rc = 0; + DEFINE_WAIT_FUNC(wait, woken_wake_function); + + add_wait_queue(sk_sleep(sk), &wait); + while (1) { + if (!*timeo) { + rc = -EAGAIN; + break; + } + + if (signal_pending(current)) { + rc = sock_intr_errno(*timeo); + break; + } + + if (sk_wait_event(sk, timeo, !sk->sk_write_pending, &wait)) + break; + } + remove_wait_queue(sk_sleep(sk), &wait); + return rc; +} + +int tls_push_sg(struct sock *sk, + struct tls_context *ctx, + struct scatterlist *sg, + u16 first_offset, + int flags) +{ + int sendpage_flags = flags | MSG_SENDPAGE_NOTLAST; + int ret = 0; + struct page *p; + size_t size; + int offset = first_offset; + + size = sg->length - offset; + offset += sg->offset; + + while (1) { + if (sg_is_last(sg)) + sendpage_flags = flags; + + /* is sending application-limited? */ + tcp_rate_check_app_limited(sk); + p = sg_page(sg); +retry: + ret = do_tcp_sendpages(sk, p, offset, size, sendpage_flags); + + if (ret != size) { + if (ret > 0) { + offset += ret; + size -= ret; + goto retry; + } + + offset -= sg->offset; + ctx->partially_sent_offset = offset; + ctx->partially_sent_record = (void *)sg; + return ret; + } + + put_page(p); + sk_mem_uncharge(sk, sg->length); + sg = sg_next(sg); + if (!sg) + break; + + offset = sg->offset; + size = sg->length; + } + + clear_bit(TLS_PENDING_CLOSED_RECORD, &ctx->flags); + + return 0; +} + +static int tls_handle_open_record(struct sock *sk, int flags) +{ + struct tls_context *ctx = tls_get_ctx(sk); + + if (tls_is_pending_open_record(ctx)) + return ctx->push_pending_record(sk, flags); + + return 0; +} + +int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg, + unsigned char *record_type) +{ + struct cmsghdr *cmsg; + int rc = -EINVAL; + + for_each_cmsghdr(cmsg, msg) { + if (!CMSG_OK(msg, cmsg)) + return -EINVAL; + if (cmsg->cmsg_level != SOL_TLS) + continue; + + switch (cmsg->cmsg_type) { + case TLS_SET_RECORD_TYPE: + if (cmsg->cmsg_len < CMSG_LEN(sizeof(*record_type))) + return -EINVAL; + + if (msg->msg_flags & MSG_MORE) + return -EINVAL; + + rc = tls_handle_open_record(sk, msg->msg_flags); + if (rc) + return rc; + + *record_type = *(unsigned char *)CMSG_DATA(cmsg); + rc = 0; + break; + default: + return -EINVAL; + } + } + + return rc; +} + +int tls_push_pending_closed_record(struct sock *sk, struct tls_context *ctx, + int flags, long *timeo) +{ + struct scatterlist *sg; + u16 offset; + + if (!tls_is_partially_sent_record(ctx)) + return ctx->push_pending_record(sk, flags); + + sg = ctx->partially_sent_record; + offset = ctx->partially_sent_offset; + + ctx->partially_sent_record = NULL; + return tls_push_sg(sk, ctx, sg, offset, flags); +} + +static void tls_write_space(struct sock *sk) +{ + struct tls_context *ctx = tls_get_ctx(sk); + + if (!sk->sk_write_pending && tls_is_pending_closed_record(ctx)) { + gfp_t sk_allocation = sk->sk_allocation; + int rc; + long timeo = 0; + + sk->sk_allocation = GFP_ATOMIC; + rc = tls_push_pending_closed_record(sk, ctx, + MSG_DONTWAIT | + MSG_NOSIGNAL, + &timeo); + sk->sk_allocation = sk_allocation; + + if (rc < 0) + return; + } + + ctx->sk_write_space(sk); +} + +static void tls_sk_proto_close(struct sock *sk, long timeout) +{ + struct tls_context *ctx = tls_get_ctx(sk); + long timeo = sock_sndtimeo(sk, 0); + void (*sk_proto_close)(struct sock *sk, long timeout); + + lock_sock(sk); + + if (!tls_complete_pending_work(sk, ctx, 0, &timeo)) + tls_handle_open_record(sk, 0); + + if (ctx->partially_sent_record) { + struct scatterlist *sg = ctx->partially_sent_record; + + while (1) { + put_page(sg_page(sg)); + sk_mem_uncharge(sk, sg->length); + + if (sg_is_last(sg)) + break; + sg++; + } + } + ctx->free_resources(sk); + kfree(ctx->rec_seq); + kfree(ctx->iv); + + sk_proto_close = ctx->sk_proto_close; + kfree(ctx); + + release_sock(sk); + sk_proto_close(sk, timeout); +} + +static int do_tls_getsockopt_tx(struct sock *sk, char __user *optval, + int __user *optlen) +{ + int rc = 0; + struct tls_context *ctx = tls_get_ctx(sk); + struct tls_crypto_info *crypto_info; + int len; + + if (get_user(len, optlen)) + return -EFAULT; + + if (!optval || (len < sizeof(*crypto_info))) { + rc = -EINVAL; + goto out; + } + + if (!ctx) { + rc = -EBUSY; + goto out; + } + + /* get user crypto info */ + crypto_info = &ctx->crypto_send; + + if (!TLS_CRYPTO_INFO_READY(crypto_info)) { + rc = -EBUSY; + goto out; + } + + if (len == sizeof(crypto_info)) { + if (copy_to_user(optval, crypto_info, sizeof(*crypto_info))) + rc = -EFAULT; + goto out; + } + + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + struct tls12_crypto_info_aes_gcm_128 * + crypto_info_aes_gcm_128 = + container_of(crypto_info, + struct tls12_crypto_info_aes_gcm_128, + info); + + if (len != sizeof(*crypto_info_aes_gcm_128)) { + rc = -EINVAL; + goto out; + } + lock_sock(sk); + memcpy(crypto_info_aes_gcm_128->iv, ctx->iv, + TLS_CIPHER_AES_GCM_128_IV_SIZE); + release_sock(sk); + if (copy_to_user(optval, + crypto_info_aes_gcm_128, + sizeof(*crypto_info_aes_gcm_128))) + rc = -EFAULT; + break; + } + default: + rc = -EINVAL; + } + +out: + return rc; +} + +static int do_tls_getsockopt(struct sock *sk, int optname, + char __user *optval, int __user *optlen) +{ + int rc = 0; + + switch (optname) { + case TLS_TX: + rc = do_tls_getsockopt_tx(sk, optval, optlen); + break; + default: + rc = -ENOPROTOOPT; + break; + } + return rc; +} + +static int tls_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) +{ + struct tls_context *ctx = tls_get_ctx(sk); + + if (level != SOL_TLS) + return ctx->getsockopt(sk, level, optname, optval, optlen); + + return do_tls_getsockopt(sk, optname, optval, optlen); +} + +static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, + unsigned int optlen) +{ + struct tls_crypto_info *crypto_info, tmp_crypto_info; + struct tls_context *ctx = tls_get_ctx(sk); + struct proto *prot = NULL; + int rc = 0; + + if (!optval || (optlen < sizeof(*crypto_info))) { + rc = -EINVAL; + goto out; + } + + rc = copy_from_user(&tmp_crypto_info, optval, sizeof(*crypto_info)); + if (rc) { + rc = -EFAULT; + goto out; + } + + /* check version */ + if (tmp_crypto_info.version != TLS_1_2_VERSION) { + rc = -ENOTSUPP; + goto out; + } + + /* get user crypto info */ + crypto_info = &ctx->crypto_send; + + /* Currently we don't support set crypto info more than one time */ + if (TLS_CRYPTO_INFO_READY(crypto_info)) + goto out; + + switch (tmp_crypto_info.cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) { + rc = -EINVAL; + goto out; + } + rc = copy_from_user( + crypto_info, + optval, + sizeof(struct tls12_crypto_info_aes_gcm_128)); + + if (rc) { + rc = -EFAULT; + goto err_crypto_info; + } + break; + } + default: + rc = -EINVAL; + goto out; + } + + ctx->sk_write_space = sk->sk_write_space; + sk->sk_write_space = tls_write_space; + + ctx->sk_proto_close = sk->sk_prot->close; + + /* currently SW is default, we will have ethtool in future */ + rc = tls_set_sw_offload(sk, ctx); + prot = &tls_sw_prot; + if (rc) + goto err_crypto_info; + + sk->sk_prot = prot; + goto out; + +err_crypto_info: + memset(crypto_info, 0, sizeof(*crypto_info)); +out: + return rc; +} + +static int do_tls_setsockopt(struct sock *sk, int optname, + char __user *optval, unsigned int optlen) +{ + int rc = 0; + + switch (optname) { + case TLS_TX: + lock_sock(sk); + rc = do_tls_setsockopt_tx(sk, optval, optlen); + release_sock(sk); + break; + default: + rc = -ENOPROTOOPT; + break; + } + return rc; +} + +static int tls_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, unsigned int optlen) +{ + struct tls_context *ctx = tls_get_ctx(sk); + + if (level != SOL_TLS) + return ctx->setsockopt(sk, level, optname, optval, optlen); + + return do_tls_setsockopt(sk, optname, optval, optlen); +} + +static int tls_init(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tls_context *ctx; + int rc = 0; + + /* allocate tls context */ + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) { + rc = -ENOMEM; + goto out; + } + icsk->icsk_ulp_data = ctx; + ctx->setsockopt = sk->sk_prot->setsockopt; + ctx->getsockopt = sk->sk_prot->getsockopt; + sk->sk_prot = &tls_base_prot; +out: + return rc; +} + +static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = { + .name = "tls", + .owner = THIS_MODULE, + .init = tls_init, +}; + +static int __init tls_register(void) +{ + tls_base_prot = tcp_prot; + tls_base_prot.setsockopt = tls_setsockopt; + tls_base_prot.getsockopt = tls_getsockopt; + + tls_sw_prot = tls_base_prot; + tls_sw_prot.sendmsg = tls_sw_sendmsg; + tls_sw_prot.sendpage = tls_sw_sendpage; + tls_sw_prot.close = tls_sk_proto_close; + + tcp_register_ulp(&tcp_tls_ulp_ops); + + return 0; +} + +static void __exit tls_unregister(void) +{ + tcp_unregister_ulp(&tcp_tls_ulp_ops); +} + +module_init(tls_register); +module_exit(tls_unregister); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c new file mode 100644 index 000000000000..fa596fa71ba7 --- /dev/null +++ b/net/tls/tls_sw.c @@ -0,0 +1,772 @@ +/* + * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved. + * Copyright (c) 2016-2017, Dave Watson <davejwatson@fb.com>. All rights reserved. + * Copyright (c) 2016-2017, Lance Chao <lancerchao@fb.com>. All rights reserved. + * Copyright (c) 2016, Fridolin Pokorny <fridolin.pokorny@gmail.com>. All rights reserved. + * Copyright (c) 2016, Nikos Mavrogiannopoulos <nmav@gnutls.org>. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/module.h> +#include <crypto/aead.h> + +#include <net/tls.h> + +static inline void tls_make_aad(int recv, + char *buf, + size_t size, + char *record_sequence, + int record_sequence_size, + unsigned char record_type) +{ + memcpy(buf, record_sequence, record_sequence_size); + + buf[8] = record_type; + buf[9] = TLS_1_2_VERSION_MAJOR; + buf[10] = TLS_1_2_VERSION_MINOR; + buf[11] = size >> 8; + buf[12] = size & 0xFF; +} + +static void trim_sg(struct sock *sk, struct scatterlist *sg, + int *sg_num_elem, unsigned int *sg_size, int target_size) +{ + int i = *sg_num_elem - 1; + int trim = *sg_size - target_size; + + if (trim <= 0) { + WARN_ON(trim < 0); + return; + } + + *sg_size = target_size; + while (trim >= sg[i].length) { + trim -= sg[i].length; + sk_mem_uncharge(sk, sg[i].length); + put_page(sg_page(&sg[i])); + i--; + + if (i < 0) + goto out; + } + + sg[i].length -= trim; + sk_mem_uncharge(sk, trim); + +out: + *sg_num_elem = i + 1; +} + +static void trim_both_sgl(struct sock *sk, int target_size) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + + trim_sg(sk, ctx->sg_plaintext_data, + &ctx->sg_plaintext_num_elem, + &ctx->sg_plaintext_size, + target_size); + + if (target_size > 0) + target_size += tls_ctx->overhead_size; + + trim_sg(sk, ctx->sg_encrypted_data, + &ctx->sg_encrypted_num_elem, + &ctx->sg_encrypted_size, + target_size); +} + +static int alloc_sg(struct sock *sk, int len, struct scatterlist *sg, + int *sg_num_elem, unsigned int *sg_size, + int first_coalesce) +{ + struct page_frag *pfrag; + unsigned int size = *sg_size; + int num_elem = *sg_num_elem, use = 0, rc = 0; + struct scatterlist *sge; + unsigned int orig_offset; + + len -= size; + pfrag = sk_page_frag(sk); + + while (len > 0) { + if (!sk_page_frag_refill(sk, pfrag)) { + rc = -ENOMEM; + goto out; + } + + use = min_t(int, len, pfrag->size - pfrag->offset); + + if (!sk_wmem_schedule(sk, use)) { + rc = -ENOMEM; + goto out; + } + + sk_mem_charge(sk, use); + size += use; + orig_offset = pfrag->offset; + pfrag->offset += use; + + sge = sg + num_elem - 1; + if (num_elem > first_coalesce && sg_page(sg) == pfrag->page && + sg->offset + sg->length == orig_offset) { + sg->length += use; + } else { + sge++; + sg_unmark_end(sge); + sg_set_page(sge, pfrag->page, use, orig_offset); + get_page(pfrag->page); + ++num_elem; + if (num_elem == MAX_SKB_FRAGS) { + rc = -ENOSPC; + break; + } + } + + len -= use; + } + goto out; + +out: + *sg_size = size; + *sg_num_elem = num_elem; + return rc; +} + +static int alloc_encrypted_sg(struct sock *sk, int len) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + int rc = 0; + + rc = alloc_sg(sk, len, ctx->sg_encrypted_data, + &ctx->sg_encrypted_num_elem, &ctx->sg_encrypted_size, 0); + + return rc; +} + +static int alloc_plaintext_sg(struct sock *sk, int len) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + int rc = 0; + + rc = alloc_sg(sk, len, ctx->sg_plaintext_data, + &ctx->sg_plaintext_num_elem, &ctx->sg_plaintext_size, + tls_ctx->pending_open_record_frags); + + return rc; +} + +static void free_sg(struct sock *sk, struct scatterlist *sg, + int *sg_num_elem, unsigned int *sg_size) +{ + int i, n = *sg_num_elem; + + for (i = 0; i < n; ++i) { + sk_mem_uncharge(sk, sg[i].length); + put_page(sg_page(&sg[i])); + } + *sg_num_elem = 0; + *sg_size = 0; +} + +static void tls_free_both_sg(struct sock *sk) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + + free_sg(sk, ctx->sg_encrypted_data, &ctx->sg_encrypted_num_elem, + &ctx->sg_encrypted_size); + + free_sg(sk, ctx->sg_plaintext_data, &ctx->sg_plaintext_num_elem, + &ctx->sg_plaintext_size); +} + +static int tls_do_encryption(struct tls_context *tls_ctx, + struct tls_sw_context *ctx, size_t data_len, + gfp_t flags) +{ + unsigned int req_size = sizeof(struct aead_request) + + crypto_aead_reqsize(ctx->aead_send); + struct aead_request *aead_req; + int rc; + + aead_req = kmalloc(req_size, flags); + if (!aead_req) + return -ENOMEM; + + ctx->sg_encrypted_data[0].offset += tls_ctx->prepend_size; + ctx->sg_encrypted_data[0].length -= tls_ctx->prepend_size; + + aead_request_set_tfm(aead_req, ctx->aead_send); + aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE); + aead_request_set_crypt(aead_req, ctx->sg_aead_in, ctx->sg_aead_out, + data_len, tls_ctx->iv); + rc = crypto_aead_encrypt(aead_req); + + ctx->sg_encrypted_data[0].offset -= tls_ctx->prepend_size; + ctx->sg_encrypted_data[0].length += tls_ctx->prepend_size; + + kfree(aead_req); + return rc; +} + +static int tls_push_record(struct sock *sk, int flags, + unsigned char record_type) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + int rc; + + sg_mark_end(ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem - 1); + sg_mark_end(ctx->sg_encrypted_data + ctx->sg_encrypted_num_elem - 1); + + tls_make_aad(0, ctx->aad_space, ctx->sg_plaintext_size, + tls_ctx->rec_seq, tls_ctx->rec_seq_size, + record_type); + + tls_fill_prepend(tls_ctx, + page_address(sg_page(&ctx->sg_encrypted_data[0])) + + ctx->sg_encrypted_data[0].offset, + ctx->sg_plaintext_size, record_type); + + tls_ctx->pending_open_record_frags = 0; + set_bit(TLS_PENDING_CLOSED_RECORD, &tls_ctx->flags); + + rc = tls_do_encryption(tls_ctx, ctx, ctx->sg_plaintext_size, + sk->sk_allocation); + if (rc < 0) { + /* If we are called from write_space and + * we fail, we need to set this SOCK_NOSPACE + * to trigger another write_space in the future. + */ + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + return rc; + } + + free_sg(sk, ctx->sg_plaintext_data, &ctx->sg_plaintext_num_elem, + &ctx->sg_plaintext_size); + + ctx->sg_encrypted_num_elem = 0; + ctx->sg_encrypted_size = 0; + + /* Only pass through MSG_DONTWAIT and MSG_NOSIGNAL flags */ + rc = tls_push_sg(sk, tls_ctx, ctx->sg_encrypted_data, 0, flags); + if (rc < 0 && rc != -EAGAIN) + tls_err_abort(sk); + + tls_advance_record_sn(sk, tls_ctx); + return rc; +} + +static int tls_sw_push_pending_record(struct sock *sk, int flags) +{ + return tls_push_record(sk, flags, TLS_RECORD_TYPE_DATA); +} + +static int zerocopy_from_iter(struct sock *sk, struct iov_iter *from, + int length) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + struct page *pages[MAX_SKB_FRAGS]; + + size_t offset; + ssize_t copied, use; + int i = 0; + unsigned int size = ctx->sg_plaintext_size; + int num_elem = ctx->sg_plaintext_num_elem; + int rc = 0; + int maxpages; + + while (length > 0) { + i = 0; + maxpages = ARRAY_SIZE(ctx->sg_plaintext_data) - num_elem; + if (maxpages == 0) { + rc = -EFAULT; + goto out; + } + copied = iov_iter_get_pages(from, pages, + length, + maxpages, &offset); + if (copied <= 0) { + rc = -EFAULT; + goto out; + } + + iov_iter_advance(from, copied); + + length -= copied; + size += copied; + while (copied) { + use = min_t(int, copied, PAGE_SIZE - offset); + + sg_set_page(&ctx->sg_plaintext_data[num_elem], + pages[i], use, offset); + sg_unmark_end(&ctx->sg_plaintext_data[num_elem]); + sk_mem_charge(sk, use); + + offset = 0; + copied -= use; + + ++i; + ++num_elem; + } + } + +out: + ctx->sg_plaintext_size = size; + ctx->sg_plaintext_num_elem = num_elem; + return rc; +} + +static int memcopy_from_iter(struct sock *sk, struct iov_iter *from, + int bytes) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + struct scatterlist *sg = ctx->sg_plaintext_data; + int copy, i, rc = 0; + + for (i = tls_ctx->pending_open_record_frags; + i < ctx->sg_plaintext_num_elem; ++i) { + copy = sg[i].length; + if (copy_from_iter( + page_address(sg_page(&sg[i])) + sg[i].offset, + copy, from) != copy) { + rc = -EFAULT; + goto out; + } + bytes -= copy; + + ++tls_ctx->pending_open_record_frags; + + if (!bytes) + break; + } + +out: + return rc; +} + +int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + int ret = 0; + int required_size; + long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); + bool eor = !(msg->msg_flags & MSG_MORE); + size_t try_to_copy, copied = 0; + unsigned char record_type = TLS_RECORD_TYPE_DATA; + int record_room; + bool full_record; + int orig_size; + + if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL)) + return -ENOTSUPP; + + lock_sock(sk); + + if (tls_complete_pending_work(sk, tls_ctx, msg->msg_flags, &timeo)) + goto send_end; + + if (unlikely(msg->msg_controllen)) { + ret = tls_proccess_cmsg(sk, msg, &record_type); + if (ret) + goto send_end; + } + + while (msg_data_left(msg)) { + if (sk->sk_err) { + ret = sk->sk_err; + goto send_end; + } + + orig_size = ctx->sg_plaintext_size; + full_record = false; + try_to_copy = msg_data_left(msg); + record_room = TLS_MAX_PAYLOAD_SIZE - ctx->sg_plaintext_size; + if (try_to_copy >= record_room) { + try_to_copy = record_room; + full_record = true; + } + + required_size = ctx->sg_plaintext_size + try_to_copy + + tls_ctx->overhead_size; + + if (!sk_stream_memory_free(sk)) + goto wait_for_sndbuf; +alloc_encrypted: + ret = alloc_encrypted_sg(sk, required_size); + if (ret) { + if (ret != -ENOSPC) + goto wait_for_memory; + + /* Adjust try_to_copy according to the amount that was + * actually allocated. The difference is due + * to max sg elements limit + */ + try_to_copy -= required_size - ctx->sg_encrypted_size; + full_record = true; + } + + if (full_record || eor) { + ret = zerocopy_from_iter(sk, &msg->msg_iter, + try_to_copy); + if (ret) + goto fallback_to_reg_send; + + copied += try_to_copy; + ret = tls_push_record(sk, msg->msg_flags, record_type); + if (!ret) + continue; + if (ret == -EAGAIN) + goto send_end; + + copied -= try_to_copy; +fallback_to_reg_send: + iov_iter_revert(&msg->msg_iter, + ctx->sg_plaintext_size - orig_size); + trim_sg(sk, ctx->sg_plaintext_data, + &ctx->sg_plaintext_num_elem, + &ctx->sg_plaintext_size, + orig_size); + } + + required_size = ctx->sg_plaintext_size + try_to_copy; +alloc_plaintext: + ret = alloc_plaintext_sg(sk, required_size); + if (ret) { + if (ret != -ENOSPC) + goto wait_for_memory; + + /* Adjust try_to_copy according to the amount that was + * actually allocated. The difference is due + * to max sg elements limit + */ + try_to_copy -= required_size - ctx->sg_plaintext_size; + full_record = true; + + trim_sg(sk, ctx->sg_encrypted_data, + &ctx->sg_encrypted_num_elem, + &ctx->sg_encrypted_size, + ctx->sg_plaintext_size + + tls_ctx->overhead_size); + } + + ret = memcopy_from_iter(sk, &msg->msg_iter, try_to_copy); + if (ret) + goto trim_sgl; + + copied += try_to_copy; + if (full_record || eor) { +push_record: + ret = tls_push_record(sk, msg->msg_flags, record_type); + if (ret) { + if (ret == -ENOMEM) + goto wait_for_memory; + + goto send_end; + } + } + + continue; + +wait_for_sndbuf: + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); +wait_for_memory: + ret = sk_stream_wait_memory(sk, &timeo); + if (ret) { +trim_sgl: + trim_both_sgl(sk, orig_size); + goto send_end; + } + + if (tls_is_pending_closed_record(tls_ctx)) + goto push_record; + + if (ctx->sg_encrypted_size < required_size) + goto alloc_encrypted; + + goto alloc_plaintext; + } + +send_end: + ret = sk_stream_error(sk, msg->msg_flags, ret); + + release_sock(sk); + return copied ? copied : ret; +} + +int tls_sw_sendpage(struct sock *sk, struct page *page, + int offset, size_t size, int flags) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + int ret = 0; + long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); + bool eor; + size_t orig_size = size; + unsigned char record_type = TLS_RECORD_TYPE_DATA; + struct scatterlist *sg; + bool full_record; + int record_room; + + if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | + MSG_SENDPAGE_NOTLAST)) + return -ENOTSUPP; + + /* No MSG_EOR from splice, only look at MSG_MORE */ + eor = !(flags & (MSG_MORE | MSG_SENDPAGE_NOTLAST)); + + lock_sock(sk); + + sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); + + if (tls_complete_pending_work(sk, tls_ctx, flags, &timeo)) + goto sendpage_end; + + /* Call the sk_stream functions to manage the sndbuf mem. */ + while (size > 0) { + size_t copy, required_size; + + if (sk->sk_err) { + ret = sk->sk_err; + goto sendpage_end; + } + + full_record = false; + record_room = TLS_MAX_PAYLOAD_SIZE - ctx->sg_plaintext_size; + copy = size; + if (copy >= record_room) { + copy = record_room; + full_record = true; + } + required_size = ctx->sg_plaintext_size + copy + + tls_ctx->overhead_size; + + if (!sk_stream_memory_free(sk)) + goto wait_for_sndbuf; +alloc_payload: + ret = alloc_encrypted_sg(sk, required_size); + if (ret) { + if (ret != -ENOSPC) + goto wait_for_memory; + + /* Adjust copy according to the amount that was + * actually allocated. The difference is due + * to max sg elements limit + */ + copy -= required_size - ctx->sg_plaintext_size; + full_record = true; + } + + get_page(page); + sg = ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem; + sg_set_page(sg, page, copy, offset); + ctx->sg_plaintext_num_elem++; + + sk_mem_charge(sk, copy); + offset += copy; + size -= copy; + ctx->sg_plaintext_size += copy; + tls_ctx->pending_open_record_frags = ctx->sg_plaintext_num_elem; + + if (full_record || eor || + ctx->sg_plaintext_num_elem == + ARRAY_SIZE(ctx->sg_plaintext_data)) { +push_record: + ret = tls_push_record(sk, flags, record_type); + if (ret) { + if (ret == -ENOMEM) + goto wait_for_memory; + + goto sendpage_end; + } + } + continue; +wait_for_sndbuf: + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); +wait_for_memory: + ret = sk_stream_wait_memory(sk, &timeo); + if (ret) { + trim_both_sgl(sk, ctx->sg_plaintext_size); + goto sendpage_end; + } + + if (tls_is_pending_closed_record(tls_ctx)) + goto push_record; + + goto alloc_payload; + } + +sendpage_end: + if (orig_size > size) + ret = orig_size - size; + else + ret = sk_stream_error(sk, flags, ret); + + release_sock(sk); + return ret; +} + +void tls_sw_free_resources(struct sock *sk) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + + if (ctx->aead_send) + crypto_free_aead(ctx->aead_send); + + tls_free_both_sg(sk); + + kfree(ctx); +} + +int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx) +{ + char keyval[TLS_CIPHER_AES_GCM_128_KEY_SIZE]; + struct tls_crypto_info *crypto_info; + struct tls12_crypto_info_aes_gcm_128 *gcm_128_info; + struct tls_sw_context *sw_ctx; + u16 nonce_size, tag_size, iv_size, rec_seq_size; + char *iv, *rec_seq; + int rc = 0; + + if (!ctx) { + rc = -EINVAL; + goto out; + } + + if (ctx->priv_ctx) { + rc = -EEXIST; + goto out; + } + + sw_ctx = kzalloc(sizeof(*sw_ctx), GFP_KERNEL); + if (!sw_ctx) { + rc = -ENOMEM; + goto out; + } + + ctx->priv_ctx = (struct tls_offload_context *)sw_ctx; + ctx->free_resources = tls_sw_free_resources; + + crypto_info = &ctx->crypto_send; + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + nonce_size = TLS_CIPHER_AES_GCM_128_IV_SIZE; + tag_size = TLS_CIPHER_AES_GCM_128_TAG_SIZE; + iv_size = TLS_CIPHER_AES_GCM_128_IV_SIZE; + iv = ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->iv; + rec_seq_size = TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE; + rec_seq = + ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->rec_seq; + gcm_128_info = + (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; + break; + } + default: + rc = -EINVAL; + goto out; + } + + ctx->prepend_size = TLS_HEADER_SIZE + nonce_size; + ctx->tag_size = tag_size; + ctx->overhead_size = ctx->prepend_size + ctx->tag_size; + ctx->iv_size = iv_size; + ctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, + GFP_KERNEL); + if (!ctx->iv) { + rc = -ENOMEM; + goto out; + } + memcpy(ctx->iv, gcm_128_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE); + memcpy(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size); + ctx->rec_seq_size = rec_seq_size; + ctx->rec_seq = kmalloc(rec_seq_size, GFP_KERNEL); + if (!ctx->rec_seq) { + rc = -ENOMEM; + goto free_iv; + } + memcpy(ctx->rec_seq, rec_seq, rec_seq_size); + + sg_init_table(sw_ctx->sg_encrypted_data, + ARRAY_SIZE(sw_ctx->sg_encrypted_data)); + sg_init_table(sw_ctx->sg_plaintext_data, + ARRAY_SIZE(sw_ctx->sg_plaintext_data)); + + sg_init_table(sw_ctx->sg_aead_in, 2); + sg_set_buf(&sw_ctx->sg_aead_in[0], sw_ctx->aad_space, + sizeof(sw_ctx->aad_space)); + sg_unmark_end(&sw_ctx->sg_aead_in[1]); + sg_chain(sw_ctx->sg_aead_in, 2, sw_ctx->sg_plaintext_data); + sg_init_table(sw_ctx->sg_aead_out, 2); + sg_set_buf(&sw_ctx->sg_aead_out[0], sw_ctx->aad_space, + sizeof(sw_ctx->aad_space)); + sg_unmark_end(&sw_ctx->sg_aead_out[1]); + sg_chain(sw_ctx->sg_aead_out, 2, sw_ctx->sg_encrypted_data); + + if (!sw_ctx->aead_send) { + sw_ctx->aead_send = crypto_alloc_aead("gcm(aes)", 0, 0); + if (IS_ERR(sw_ctx->aead_send)) { + rc = PTR_ERR(sw_ctx->aead_send); + sw_ctx->aead_send = NULL; + goto free_rec_seq; + } + } + + ctx->push_pending_record = tls_sw_push_pending_record; + + memcpy(keyval, gcm_128_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE); + + rc = crypto_aead_setkey(sw_ctx->aead_send, keyval, + TLS_CIPHER_AES_GCM_128_KEY_SIZE); + if (rc) + goto free_aead; + + rc = crypto_aead_setauthsize(sw_ctx->aead_send, ctx->tag_size); + if (!rc) + goto out; + +free_aead: + crypto_free_aead(sw_ctx->aead_send); + sw_ctx->aead_send = NULL; +free_rec_seq: + kfree(ctx->rec_seq); + ctx->rec_seq = NULL; +free_iv: + kfree(ctx->iv); + ctx->iv = NULL; +out: + return rc; +} diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index c77ced0109b7..7b52a380d710 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -212,7 +212,7 @@ EXPORT_SYMBOL_GPL(unix_peer_get); static inline void unix_release_addr(struct unix_address *addr) { - if (atomic_dec_and_test(&addr->refcnt)) + if (refcount_dec_and_test(&addr->refcnt)) kfree(addr); } @@ -442,7 +442,7 @@ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) static int unix_writable(const struct sock *sk) { return sk->sk_state != TCP_LISTEN && - (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; + (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; } static void unix_write_space(struct sock *sk) @@ -487,7 +487,7 @@ static void unix_sock_destructor(struct sock *sk) skb_queue_purge(&sk->sk_receive_queue); - WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(refcount_read(&sk->sk_wmem_alloc)); WARN_ON(!sk_unhashed(sk)); WARN_ON(sk->sk_socket); if (!sock_flag(sk, SOCK_DEAD)) { @@ -864,7 +864,7 @@ static int unix_autobind(struct socket *sock) goto out; addr->name->sun_family = AF_UNIX; - atomic_set(&addr->refcnt, 1); + refcount_set(&addr->refcnt, 1); retry: addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short); @@ -1040,7 +1040,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) memcpy(addr->name, sunaddr, addr_len); addr->len = addr_len; addr->hash = hash ^ sk->sk_type; - atomic_set(&addr->refcnt, 1); + refcount_set(&addr->refcnt, 1); if (sun_path[0]) { addr->hash = UNIX_HASH_SIZE; @@ -1335,7 +1335,7 @@ restart: /* copy address information from listening to new sock*/ if (otheru->addr) { - atomic_inc(&otheru->addr->refcnt); + refcount_inc(&otheru->addr->refcnt); newu->addr = otheru->addr; } if (otheru->path.dentry) { @@ -2033,7 +2033,7 @@ alloc_skb: skb->len += size; skb->data_len += size; skb->truesize += size; - atomic_add(size, &sk->sk_wmem_alloc); + refcount_add(size, &sk->sk_wmem_alloc); if (newskb) { err = unix_scm_to_skb(&scm, skb, false); @@ -2847,7 +2847,7 @@ static int unix_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu", s, - atomic_read(&s->sk_refcnt), + refcount_read(&s->sk_refcnt), 0, s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0, s->sk_type, diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 18e24793659f..edba7ab97563 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -90,7 +90,6 @@ out_pkt: static struct sk_buff *virtio_transport_build_skb(void *opaque) { struct virtio_vsock_pkt *pkt = opaque; - unsigned char *t_hdr, *payload; struct af_vsockmon_hdr *hdr; struct sk_buff *skb; @@ -99,7 +98,7 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) if (!skb) return NULL; - hdr = (struct af_vsockmon_hdr *)skb_put(skb, sizeof(*hdr)); + hdr = skb_put(skb, sizeof(*hdr)); /* pkt->hdr is little-endian so no need to byteswap here */ hdr->src_cid = pkt->hdr.src_cid; @@ -132,12 +131,10 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) break; } - t_hdr = skb_put(skb, sizeof(pkt->hdr)); - memcpy(t_hdr, &pkt->hdr, sizeof(pkt->hdr)); + skb_put_data(skb, &pkt->hdr, sizeof(pkt->hdr)); if (pkt->len) { - payload = skb_put(skb, pkt->len); - memcpy(payload, pkt->buf, pkt->len); + skb_put_data(skb, pkt->buf, pkt->len); } return skb; diff --git a/net/wireless/core.c b/net/wireless/core.c index 83ea164f16b3..7b33e8c366bc 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -711,6 +711,11 @@ int wiphy_register(struct wiphy *wiphy) (wiphy->bss_select_support & ~(BIT(__NL80211_BSS_SELECT_ATTR_AFTER_LAST) - 2)))) return -EINVAL; + if (WARN_ON(wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X) && + (!rdev->ops->set_pmk || !rdev->ops->del_pmk))) + return -EINVAL; + if (wiphy->addresses) memcpy(wiphy->perm_addr, wiphy->addresses[0].addr, ETH_ALEN); diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index ec0b1c20ac99..421a6b80ec62 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -174,6 +174,14 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, scan_width); } + err = cfg80211_chandef_dfs_required(&rdev->wiphy, + &setup->chandef, + NL80211_IFTYPE_MESH_POINT); + if (err < 0) + return err; + if (err > 0 && !setup->userspace_handles_dfs) + return -EINVAL; + if (!cfg80211_reg_can_beacon(&rdev->wiphy, &setup->chandef, NL80211_IFTYPE_MESH_POINT)) return -EINVAL; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c3bc9da30cff..45ba3d0872cc 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -7501,6 +7501,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) static struct nlattr *csa_attrs[NL80211_ATTR_MAX+1]; int err; bool need_new_beacon = false; + bool need_handle_dfs_flag = true; int len, i; u32 cs_count; @@ -7512,6 +7513,12 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: need_new_beacon = true; + /* For all modes except AP the handle_dfs flag needs to be + * supplied to tell the kernel that userspace will handle radar + * events when they happen. Otherwise a switch to a channel + * requiring DFS will be rejected. + */ + need_handle_dfs_flag = false; /* useless if AP is not running */ if (!wdev->beacon_interval) @@ -7634,8 +7641,13 @@ skip_beacons: if (err < 0) return err; - if (err > 0) + if (err > 0) { params.radar_required = true; + if (need_handle_dfs_flag && + !nla_get_flag(info->attrs[NL80211_ATTR_HANDLE_DFS])) { + return -EINVAL; + } + } if (info->attrs[NL80211_ATTR_CH_SWITCH_BLOCK_TX]) params.block_tx = true; @@ -8156,6 +8168,15 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, memcpy(settings->akm_suites, data, len); } + if (info->attrs[NL80211_ATTR_PMK]) { + if (nla_len(info->attrs[NL80211_ATTR_PMK]) != WLAN_PMK_LEN) + return -EINVAL; + if (!wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK)) + return -EINVAL; + settings->psk = nla_data(info->attrs[NL80211_ATTR_PMK]); + } + return 0; } @@ -8860,6 +8881,12 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) connect.privacy = info->attrs[NL80211_ATTR_PRIVACY]; + if (info->attrs[NL80211_ATTR_WANT_1X_4WAY_HS] && + !wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X)) + return -EINVAL; + connect.want_1x = info->attrs[NL80211_ATTR_WANT_1X_4WAY_HS]; + err = nl80211_crypto_settings(rdev, info, &connect.crypto, NL80211_MAX_NR_CIPHER_SUITES); if (err) @@ -9962,6 +9989,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) return err; } + setup.userspace_handles_dfs = + nla_get_flag(info->attrs[NL80211_ATTR_HANDLE_DFS]); + return cfg80211_join_mesh(rdev, dev, &setup, &cfg); } @@ -11176,10 +11206,6 @@ static int nl80211_nan_add_func(struct sk_buff *skb, if (!info->attrs[NL80211_ATTR_NAN_FUNC]) return -EINVAL; - if (wdev->owner_nlportid && - wdev->owner_nlportid != info->snd_portid) - return -ENOTCONN; - err = nla_parse_nested(tb, NL80211_NAN_FUNC_ATTR_MAX, info->attrs[NL80211_ATTR_NAN_FUNC], nl80211_nan_func_policy, info->extack); @@ -11411,10 +11437,6 @@ static int nl80211_nan_del_func(struct sk_buff *skb, if (!info->attrs[NL80211_ATTR_COOKIE]) return -EINVAL; - if (wdev->owner_nlportid && - wdev->owner_nlportid != info->snd_portid) - return -ENOTCONN; - cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]); rdev_del_nan_func(rdev, wdev, cookie); @@ -12241,6 +12263,90 @@ static int nl80211_set_multicast_to_unicast(struct sk_buff *skb, return rdev_set_multicast_to_unicast(rdev, dev, enabled); } +static int nl80211_set_pmk(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_pmk_conf pmk_conf = {}; + int ret; + + if (wdev->iftype != NL80211_IFTYPE_STATION && + wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) + return -EOPNOTSUPP; + + if (!wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X)) + return -EOPNOTSUPP; + + if (!info->attrs[NL80211_ATTR_MAC] || !info->attrs[NL80211_ATTR_PMK]) + return -EINVAL; + + wdev_lock(wdev); + if (!wdev->current_bss) { + ret = -ENOTCONN; + goto out; + } + + pmk_conf.aa = nla_data(info->attrs[NL80211_ATTR_MAC]); + if (memcmp(pmk_conf.aa, wdev->current_bss->pub.bssid, ETH_ALEN)) { + ret = -EINVAL; + goto out; + } + + pmk_conf.pmk = nla_data(info->attrs[NL80211_ATTR_PMK]); + pmk_conf.pmk_len = nla_len(info->attrs[NL80211_ATTR_PMK]); + if (pmk_conf.pmk_len != WLAN_PMK_LEN && + pmk_conf.pmk_len != WLAN_PMK_LEN_SUITE_B_192) { + ret = -EINVAL; + goto out; + } + + if (info->attrs[NL80211_ATTR_PMKR0_NAME]) { + int r0_name_len = nla_len(info->attrs[NL80211_ATTR_PMKR0_NAME]); + + if (r0_name_len != WLAN_PMK_NAME_LEN) { + ret = -EINVAL; + goto out; + } + + pmk_conf.pmk_r0_name = + nla_data(info->attrs[NL80211_ATTR_PMKR0_NAME]); + } + + ret = rdev_set_pmk(rdev, dev, &pmk_conf); +out: + wdev_unlock(wdev); + return ret; +} + +static int nl80211_del_pmk(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + const u8 *aa; + int ret; + + if (wdev->iftype != NL80211_IFTYPE_STATION && + wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) + return -EOPNOTSUPP; + + if (!wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X)) + return -EOPNOTSUPP; + + if (!info->attrs[NL80211_ATTR_MAC]) + return -EINVAL; + + wdev_lock(wdev); + aa = nla_data(info->attrs[NL80211_ATTR_MAC]); + ret = rdev_del_pmk(rdev, dev, aa); + wdev_unlock(wdev); + + return ret; +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -13116,6 +13222,21 @@ static const struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_SET_PMK, + .doit = nl80211_set_pmk, + .policy = nl80211_policy, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_DEL_PMK, + .doit = nl80211_del_pmk, + .policy = nl80211_policy, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, + }; static struct genl_family nl80211_fam __ro_after_init = { @@ -13671,7 +13792,9 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev, info->req_ie)) || (info->resp_ie && nla_put(msg, NL80211_ATTR_RESP_IE, info->resp_ie_len, - info->resp_ie))) + info->resp_ie)) || + (info->authorized && + nla_put_flag(msg, NL80211_ATTR_PORT_AUTHORIZED))) goto nla_put_failure; genlmsg_end(msg, hdr); diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 0598c1e5d0ad..ce23d7d49960 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1164,4 +1164,29 @@ rdev_set_coalesce(struct cfg80211_registered_device *rdev, trace_rdev_return_int(&rdev->wiphy, ret); return ret; } + +static inline int rdev_set_pmk(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_pmk_conf *pmk_conf) +{ + int ret = -EOPNOTSUPP; + + trace_rdev_set_pmk(&rdev->wiphy, dev, pmk_conf); + if (rdev->ops->set_pmk) + ret = rdev->ops->set_pmk(&rdev->wiphy, dev, pmk_conf); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + +static inline int rdev_del_pmk(struct cfg80211_registered_device *rdev, + struct net_device *dev, const u8 *aa) +{ + int ret = -EOPNOTSUPP; + + trace_rdev_del_pmk(&rdev->wiphy, dev, aa); + if (rdev->ops->del_pmk) + ret = rdev->ops->del_pmk(&rdev->wiphy, dev, aa); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 532a0007ce82..0a49b88070d0 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -960,6 +960,7 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info, ev->rm.resp_ie_len = info->resp_ie_len; memcpy((void *)ev->rm.resp_ie, info->resp_ie, info->resp_ie_len); ev->rm.bss = info->bss; + ev->rm.authorized = info->authorized; spin_lock_irqsave(&wdev->event_lock, flags); list_add_tail(&ev->list, &wdev->event_list); diff --git a/net/wireless/trace.h b/net/wireless/trace.h index ca8b2059f92c..0f8db41eaddb 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2258,6 +2258,66 @@ TRACE_EVENT(rdev_tdls_cancel_channel_switch, WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(addr)) ); +TRACE_EVENT(rdev_set_pmk, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_pmk_conf *pmk_conf), + + TP_ARGS(wiphy, netdev, pmk_conf), + + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + MAC_ENTRY(aa) + __field(u8, pmk_len) + __field(u8, pmk_r0_name_len) + __dynamic_array(u8, pmk, pmk_conf->pmk_len) + __dynamic_array(u8, pmk_r0_name, WLAN_PMK_NAME_LEN) + ), + + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + MAC_ASSIGN(aa, pmk_conf->aa); + __entry->pmk_len = pmk_conf->pmk_len; + __entry->pmk_r0_name_len = + pmk_conf->pmk_r0_name ? WLAN_PMK_NAME_LEN : 0; + memcpy(__get_dynamic_array(pmk), pmk_conf->pmk, + pmk_conf->pmk_len); + memcpy(__get_dynamic_array(pmk_r0_name), pmk_conf->pmk_r0_name, + pmk_conf->pmk_r0_name ? WLAN_PMK_NAME_LEN : 0); + ), + + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT + "pmk_len=%u, pmk: %s pmk_r0_name: %s", WIPHY_PR_ARG, + NETDEV_PR_ARG, MAC_PR_ARG(aa), __entry->pmk_len, + __print_array(__get_dynamic_array(pmk), + __get_dynamic_array_len(pmk), 1), + __entry->pmk_r0_name_len ? + __print_array(__get_dynamic_array(pmk_r0_name), + __get_dynamic_array_len(pmk_r0_name), 1) : "") +); + +TRACE_EVENT(rdev_del_pmk, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, const u8 *aa), + + TP_ARGS(wiphy, netdev, aa), + + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + MAC_ENTRY(aa) + ), + + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + MAC_ASSIGN(aa, aa); + ), + + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT, + WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(aa)) +); + /************************************************************* * cfg80211 exported functions traces * *************************************************************/ diff --git a/net/wireless/util.c b/net/wireless/util.c index 4992f1025c9d..bcb1284c3415 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -522,7 +522,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, pskb_pull(skb, hdrlen); if (!ehdr) - ehdr = (struct ethhdr *) skb_push(skb, sizeof(struct ethhdr)); + ehdr = skb_push(skb, sizeof(struct ethhdr)); memcpy(ehdr, &tmp, sizeof(tmp)); return 0; @@ -1219,8 +1219,8 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate) u32 bitrate; int idx; - if (WARN_ON_ONCE(rate->mcs > 9)) - return 0; + if (rate->mcs > 9) + goto warn; switch (rate->bw) { case RATE_INFO_BW_160: @@ -1235,8 +1235,7 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate) case RATE_INFO_BW_5: case RATE_INFO_BW_10: default: - WARN_ON(1); - /* fall through */ + goto warn; case RATE_INFO_BW_20: idx = 0; } @@ -1249,6 +1248,10 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate) /* do NOT round down here */ return (bitrate + 50000) / 100000; + warn: + WARN_ONCE(1, "invalid rate bw=%d, mcs=%d, nss=%d\n", + rate->bw, rate->mcs, rate->nss); + return 0; } u32 cfg80211_calculate_bitrate(struct rate_info *rate) diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c index bcaa180d6a3f..e0cd04d28352 100644 --- a/net/x25/x25_link.c +++ b/net/x25/x25_link.c @@ -266,7 +266,7 @@ void x25_link_device_up(struct net_device *dev) X25_MASK_PACKET_SIZE | X25_MASK_WINDOW_SIZE; nb->t20 = sysctl_x25_restart_request_timeout; - atomic_set(&nb->refcnt, 1); + refcount_set(&nb->refcnt, 1); write_lock_bh(&x25_neigh_list_lock); list_add(&nb->node, &x25_neigh_list); diff --git a/net/x25/x25_route.c b/net/x25/x25_route.c index 277c8d2448d6..b85b889596e5 100644 --- a/net/x25/x25_route.c +++ b/net/x25/x25_route.c @@ -55,7 +55,7 @@ static int x25_add_route(struct x25_address *address, unsigned int sigdigits, rt->sigdigits = sigdigits; rt->dev = dev; - atomic_set(&rt->refcnt, 1); + refcount_set(&rt->refcnt, 1); list_add(&rt->node, &x25_route_list); rc = 0; diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c index 6b5af65f491f..db0b1315d577 100644 --- a/net/x25/x25_subr.c +++ b/net/x25/x25_subr.c @@ -188,17 +188,14 @@ void x25_write_internal(struct sock *sk, int frametype) *dptr++ = X25_CALL_REQUEST; len = x25_addr_aton(addresses, &x25->dest_addr, &x25->source_addr); - dptr = skb_put(skb, len); - memcpy(dptr, addresses, len); + skb_put_data(skb, addresses, len); len = x25_create_facilities(facilities, &x25->facilities, &x25->dte_facilities, x25->neighbour->global_facil_mask); - dptr = skb_put(skb, len); - memcpy(dptr, facilities, len); - dptr = skb_put(skb, x25->calluserdata.cudlength); - memcpy(dptr, x25->calluserdata.cuddata, - x25->calluserdata.cudlength); + skb_put_data(skb, facilities, len); + skb_put_data(skb, x25->calluserdata.cuddata, + x25->calluserdata.cudlength); x25->calluserdata.cudlength = 0; break; @@ -210,17 +207,15 @@ void x25_write_internal(struct sock *sk, int frametype) &x25->facilities, &x25->dte_facilities, x25->vc_facil_mask); - dptr = skb_put(skb, len); - memcpy(dptr, facilities, len); + skb_put_data(skb, facilities, len); /* fast select with no restriction on response allows call user data. Userland must ensure it is ours and not theirs */ if(x25->facilities.reverse & 0x80) { - dptr = skb_put(skb, - x25->calluserdata.cudlength); - memcpy(dptr, x25->calluserdata.cuddata, - x25->calluserdata.cudlength); + skb_put_data(skb, + x25->calluserdata.cuddata, + x25->calluserdata.cudlength); } x25->calluserdata.cudlength = 0; break; diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 5aba03685d7d..5f7e8bfa0c2d 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -140,7 +140,7 @@ ok: EXPORT_SYMBOL_GPL(xfrm_dev_offload_ok); #endif -int xfrm_dev_register(struct net_device *dev) +static int xfrm_dev_register(struct net_device *dev) { if ((dev->features & NETIF_F_HW_ESP) && !dev->xfrmdev_ops) return NOTIFY_BAD; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 9de4b1dbc0ae..923205e279f7 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -116,7 +116,7 @@ struct sec_path *secpath_dup(struct sec_path *src) for (i = 0; i < sp->len; i++) xfrm_state_hold(sp->xvec[i]); } - atomic_set(&sp->refcnt, 1); + refcount_set(&sp->refcnt, 1); return sp; } EXPORT_SYMBOL(secpath_dup); @@ -126,7 +126,7 @@ int secpath_set(struct sk_buff *skb) struct sec_path *sp; /* Allocate new secpath or COW existing one. */ - if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { + if (!skb->sp || refcount_read(&skb->sp->refcnt) != 1) { sp = secpath_dup(skb->sp); if (!sp) return -ENOMEM; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 643a18f72032..ff61d8557929 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -62,7 +62,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, static inline bool xfrm_pol_hold_rcu(struct xfrm_policy *policy) { - return atomic_inc_not_zero(&policy->refcnt); + return refcount_inc_not_zero(&policy->refcnt); } static inline bool @@ -292,7 +292,7 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp) INIT_HLIST_NODE(&policy->bydst); INIT_HLIST_NODE(&policy->byidx); rwlock_init(&policy->lock); - atomic_set(&policy->refcnt, 1); + refcount_set(&policy->refcnt, 1); skb_queue_head_init(&policy->polq.hold_queue); setup_timer(&policy->timer, xfrm_policy_timer, (unsigned long)policy); @@ -1586,7 +1586,9 @@ static void xfrm_bundle_flo_delete(struct flow_cache_object *flo) struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); struct dst_entry *dst = &xdst->u.dst; - dst_free(dst); + /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */ + dst->obsolete = DST_OBSOLETE_DEAD; + dst_release_immediate(dst); } static const struct flow_cache_ops xfrm_bundle_fc_ops = { @@ -1616,7 +1618,7 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) default: BUG(); } - xdst = dst_alloc(dst_ops, NULL, 0, DST_OBSOLETE_NONE, 0); + xdst = dst_alloc(dst_ops, NULL, 1, DST_OBSOLETE_NONE, 0); if (likely(xdst)) { struct dst_entry *dst = &xdst->u.dst; @@ -1719,10 +1721,11 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, if (!dst_prev) dst0 = dst1; - else { - dst_prev->child = dst_clone(dst1); - dst1->flags |= DST_NOHASH; - } + else + /* Ref count is taken during xfrm_alloc_dst() + * No need to do dst_clone() on dst1 + */ + dst_prev->child = dst1; xdst->route = dst; dst_copy_metrics(dst1, dst); @@ -1788,7 +1791,7 @@ put_states: xfrm_state_put(xfrm[i]); free_dst: if (dst0) - dst_free(dst0); + dst_release_immediate(dst0); dst0 = ERR_PTR(err); goto out; } @@ -2069,7 +2072,11 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, pol_dead |= pols[i]->walk.dead; } if (pol_dead) { - dst_free(&xdst->u.dst); + /* Mark DST_OBSOLETE_DEAD to fail the next + * xfrm_dst_check() + */ + xdst->u.dst.obsolete = DST_OBSOLETE_DEAD; + dst_release_immediate(&xdst->u.dst); xdst = NULL; num_pols = 0; num_xfrms = 0; @@ -2116,11 +2123,12 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, if (xdst) { /* The policies were stolen for newly generated bundle */ xdst->num_pols = 0; - dst_free(&xdst->u.dst); + /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */ + xdst->u.dst.obsolete = DST_OBSOLETE_DEAD; + dst_release_immediate(&xdst->u.dst); } - /* Flow cache does not have reference, it dst_free()'s, - * but we do need to return one reference for original caller */ + /* We do need to return one reference for original caller */ dst_hold(&new_xdst->u.dst); return &new_xdst->flo; @@ -2143,9 +2151,11 @@ make_dummy_bundle: inc_error: XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); error: - if (xdst != NULL) - dst_free(&xdst->u.dst); - else + if (xdst != NULL) { + /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */ + xdst->u.dst.obsolete = DST_OBSOLETE_DEAD; + dst_release_immediate(&xdst->u.dst); + } else xfrm_pols_put(pols, num_pols); return ERR_PTR(err); } @@ -2217,7 +2227,6 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, } dst_hold(&xdst->u.dst); - xdst->u.dst.flags |= DST_NOCACHE; route = xdst->route; } } @@ -2632,10 +2641,12 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) * notice. That's what we are validating here via the * stale_bundle() check. * - * When a policy's bundle is pruned, we dst_free() the XFRM - * dst which causes it's ->obsolete field to be set to - * DST_OBSOLETE_DEAD. If an XFRM dst has been pruned like - * this, we want to force a new route lookup. + * When an xdst is removed from flow cache, DST_OBSOLETE_DEAD will + * be marked on it. + * When a dst is removed from the fib tree, DST_OBSOLETE_DEAD will + * be marked on it. + * Both will force stable_bundle() to fail on any xdst bundle with + * this dst linked in it. */ if (dst->obsolete < 0 && !stale_bundle(dst)) return dst; @@ -3264,11 +3275,6 @@ static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate) return -EINVAL; for (i = 0; i < num_migrate; i++) { - if (xfrm_addr_equal(&m[i].old_daddr, &m[i].new_daddr, - m[i].old_family) && - xfrm_addr_equal(&m[i].old_saddr, &m[i].new_saddr, - m[i].old_family)) - return -EINVAL; if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) || xfrm_addr_any(&m[i].new_saddr, m[i].new_family)) return -EINVAL; @@ -3292,7 +3298,8 @@ static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate) int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_migrate, - struct xfrm_kmaddress *k, struct net *net) + struct xfrm_kmaddress *k, struct net *net, + struct xfrm_encap_tmpl *encap) { int i, err, nx_cur = 0, nx_new = 0; struct xfrm_policy *pol = NULL; @@ -3315,7 +3322,8 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, if ((x = xfrm_migrate_state_find(mp, net))) { x_cur[nx_cur] = x; nx_cur++; - if ((xc = xfrm_state_migrate(x, mp))) { + xc = xfrm_state_migrate(x, mp, encap); + if (xc) { x_new[nx_new] = xc; nx_new++; } else { @@ -3336,7 +3344,7 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, } /* Stage 5 - announce */ - km_migrate(sel, dir, type, m, num_migrate, k); + km_migrate(sel, dir, type, m, num_migrate, k, encap); xfrm_pol_put(pol); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 2e291bc5f1fc..6c0956d10db6 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -48,7 +48,7 @@ static HLIST_HEAD(xfrm_state_gc_list); static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x) { - return atomic_inc_not_zero(&x->refcnt); + return refcount_inc_not_zero(&x->refcnt); } static inline unsigned int xfrm_dst_hash(struct net *net, @@ -558,7 +558,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net) if (x) { write_pnet(&x->xs_net, net); - atomic_set(&x->refcnt, 1); + refcount_set(&x->refcnt, 1); atomic_set(&x->tunnel_users, 0); INIT_LIST_HEAD(&x->km.all); INIT_HLIST_NODE(&x->bydst); @@ -1309,7 +1309,8 @@ out: EXPORT_SYMBOL(xfrm_state_add); #ifdef CONFIG_XFRM_MIGRATE -static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig) +static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, + struct xfrm_encap_tmpl *encap) { struct net *net = xs_net(orig); struct xfrm_state *x = xfrm_state_alloc(net); @@ -1351,8 +1352,14 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig) } x->props.calgo = orig->props.calgo; - if (orig->encap) { - x->encap = kmemdup(orig->encap, sizeof(*x->encap), GFP_KERNEL); + if (encap || orig->encap) { + if (encap) + x->encap = kmemdup(encap, sizeof(*x->encap), + GFP_KERNEL); + else + x->encap = kmemdup(orig->encap, sizeof(*x->encap), + GFP_KERNEL); + if (!x->encap) goto error; } @@ -1442,11 +1449,12 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n EXPORT_SYMBOL(xfrm_migrate_state_find); struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, - struct xfrm_migrate *m) + struct xfrm_migrate *m, + struct xfrm_encap_tmpl *encap) { struct xfrm_state *xc; - xc = xfrm_state_clone(x); + xc = xfrm_state_clone(x, encap); if (!xc) return NULL; @@ -1958,7 +1966,8 @@ EXPORT_SYMBOL(km_policy_expired); #ifdef CONFIG_XFRM_MIGRATE int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_migrate, - const struct xfrm_kmaddress *k) + const struct xfrm_kmaddress *k, + const struct xfrm_encap_tmpl *encap) { int err = -EINVAL; int ret; @@ -1967,7 +1976,8 @@ int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, rcu_read_lock(); list_for_each_entry_rcu(km, &xfrm_km_list, list) { if (km->migrate) { - ret = km->migrate(sel, dir, type, m, num_migrate, k); + ret = km->migrate(sel, dir, type, m, num_migrate, k, + encap); if (!ret) err = ret; } @@ -2025,13 +2035,9 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen if (optlen <= 0 || optlen > PAGE_SIZE) return -EMSGSIZE; - data = kmalloc(optlen, GFP_KERNEL); - if (!data) - return -ENOMEM; - - err = -EFAULT; - if (copy_from_user(data, optval, optlen)) - goto out; + data = memdup_user(optval, optlen); + if (IS_ERR(data)) + return PTR_ERR(data); err = -EINVAL; rcu_read_lock(); @@ -2049,7 +2055,6 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen err = 0; } -out: kfree(data); return err; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 86116e9aaf3d..2be4c6af008a 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2244,6 +2244,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, int err; int n = 0; struct net *net = sock_net(skb->sk); + struct xfrm_encap_tmpl *encap = NULL; if (attrs[XFRMA_MIGRATE] == NULL) return -EINVAL; @@ -2261,9 +2262,18 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, if (!n) return 0; - xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net); + if (attrs[XFRMA_ENCAP]) { + encap = kmemdup(nla_data(attrs[XFRMA_ENCAP]), + sizeof(*encap), GFP_KERNEL); + if (!encap) + return 0; + } - return 0; + err = xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net, encap); + + kfree(encap); + + return err; } #else static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -2305,17 +2315,20 @@ static int copy_to_user_kmaddress(const struct xfrm_kmaddress *k, struct sk_buff return nla_put(skb, XFRMA_KMADDRESS, sizeof(uk), &uk); } -static inline size_t xfrm_migrate_msgsize(int num_migrate, int with_kma) +static inline size_t xfrm_migrate_msgsize(int num_migrate, int with_kma, + int with_encp) { return NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_id)) + (with_kma ? nla_total_size(sizeof(struct xfrm_kmaddress)) : 0) + + (with_encp ? nla_total_size(sizeof(struct xfrm_encap_tmpl)) : 0) + nla_total_size(sizeof(struct xfrm_user_migrate) * num_migrate) + userpolicy_type_attrsize(); } static int build_migrate(struct sk_buff *skb, const struct xfrm_migrate *m, int num_migrate, const struct xfrm_kmaddress *k, - const struct xfrm_selector *sel, u8 dir, u8 type) + const struct xfrm_selector *sel, + const struct xfrm_encap_tmpl *encap, u8 dir, u8 type) { const struct xfrm_migrate *mp; struct xfrm_userpolicy_id *pol_id; @@ -2337,6 +2350,11 @@ static int build_migrate(struct sk_buff *skb, const struct xfrm_migrate *m, if (err) goto out_cancel; } + if (encap) { + err = nla_put(skb, XFRMA_ENCAP, sizeof(*encap), encap); + if (err) + goto out_cancel; + } err = copy_to_user_policy_type(type, skb); if (err) goto out_cancel; @@ -2356,17 +2374,19 @@ out_cancel: static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_migrate, - const struct xfrm_kmaddress *k) + const struct xfrm_kmaddress *k, + const struct xfrm_encap_tmpl *encap) { struct net *net = &init_net; struct sk_buff *skb; - skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate, !!k), GFP_ATOMIC); + skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate, !!k, !!encap), + GFP_ATOMIC); if (skb == NULL) return -ENOMEM; /* build migrate */ - if (build_migrate(skb, m, num_migrate, k, sel, dir, type) < 0) + if (build_migrate(skb, m, num_migrate, k, sel, encap, dir, type) < 0) BUG(); return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MIGRATE); @@ -2374,7 +2394,8 @@ static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, #else static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_migrate, - const struct xfrm_kmaddress *k) + const struct xfrm_kmaddress *k, + const struct xfrm_encap_tmpl *encap) { return -ENOPROTOOPT; } |