diff options
Diffstat (limited to 'net')
339 files changed, 5813 insertions, 31939 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index c40f7d5c4fca..7aa3af8b10ea 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -172,39 +172,42 @@ int vlan_dev_set_egress_priority(const struct net_device *dev, u32 skb_prio, u16 vlan_prio) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); - struct vlan_priority_tci_mapping *mp = NULL; + struct vlan_priority_tci_mapping __rcu **mpp; + struct vlan_priority_tci_mapping *mp; struct vlan_priority_tci_mapping *np; + u32 bucket = skb_prio & 0xF; u32 vlan_qos = (vlan_prio << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK; /* See if a priority mapping exists.. */ - mp = vlan->egress_priority_map[skb_prio & 0xF]; + mpp = &vlan->egress_priority_map[bucket]; + mp = rtnl_dereference(*mpp); while (mp) { if (mp->priority == skb_prio) { - if (mp->vlan_qos && !vlan_qos) + if (!vlan_qos) { + rcu_assign_pointer(*mpp, rtnl_dereference(mp->next)); vlan->nr_egress_mappings--; - else if (!mp->vlan_qos && vlan_qos) - vlan->nr_egress_mappings++; - mp->vlan_qos = vlan_qos; + kfree_rcu(mp, rcu); + } else { + WRITE_ONCE(mp->vlan_qos, vlan_qos); + } return 0; } - mp = mp->next; + mpp = &mp->next; + mp = rtnl_dereference(*mpp); } /* Create a new mapping then. */ - mp = vlan->egress_priority_map[skb_prio & 0xF]; + if (!vlan_qos) + return 0; + np = kmalloc_obj(struct vlan_priority_tci_mapping); if (!np) return -ENOBUFS; - np->next = mp; np->priority = skb_prio; np->vlan_qos = vlan_qos; - /* Before inserting this element in hash table, make sure all its fields - * are committed to memory. - * coupled with smp_rmb() in vlan_dev_get_egress_qos_mask() - */ - smp_wmb(); - vlan->egress_priority_map[skb_prio & 0xF] = np; + RCU_INIT_POINTER(np->next, rtnl_dereference(vlan->egress_priority_map[bucket])); + rcu_assign_pointer(vlan->egress_priority_map[bucket], np); if (vlan_qos) vlan->nr_egress_mappings++; return 0; @@ -604,11 +607,17 @@ void vlan_dev_free_egress_priority(const struct net_device *dev) int i; for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) { - while ((pm = vlan->egress_priority_map[i]) != NULL) { - vlan->egress_priority_map[i] = pm->next; - kfree(pm); + pm = rtnl_dereference(vlan->egress_priority_map[i]); + RCU_INIT_POINTER(vlan->egress_priority_map[i], NULL); + while (pm) { + struct vlan_priority_tci_mapping *next; + + next = rtnl_dereference(pm->next); + kfree_rcu(pm, rcu); + pm = next; } } + vlan->nr_egress_mappings = 0; } static void vlan_dev_uninit(struct net_device *dev) diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index a000b1ef0520..368d53ca7d87 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -260,13 +260,11 @@ static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev) goto nla_put_failure; for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) { - for (pm = vlan->egress_priority_map[i]; pm; - pm = pm->next) { - if (!pm->vlan_qos) - continue; - + for (pm = rcu_dereference_rtnl(vlan->egress_priority_map[i]); pm; + pm = rcu_dereference_rtnl(pm->next)) { + u16 vlan_qos = READ_ONCE(pm->vlan_qos); m.from = pm->priority; - m.to = (pm->vlan_qos >> 13) & 0x7; + m.to = (vlan_qos >> 13) & 0x7; if (nla_put(skb, IFLA_VLAN_QOS_MAPPING, sizeof(m), &m)) goto nla_put_failure; diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index fa67374bda49..0e424e0895b7 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -262,15 +262,19 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset) vlan->ingress_priority_map[7]); seq_printf(seq, " EGRESS priority mappings: "); + rcu_read_lock(); for (i = 0; i < 16; i++) { - const struct vlan_priority_tci_mapping *mp - = vlan->egress_priority_map[i]; + const struct vlan_priority_tci_mapping *mp = + rcu_dereference(vlan->egress_priority_map[i]); while (mp) { + u16 vlan_qos = READ_ONCE(mp->vlan_qos); + seq_printf(seq, "%u:%d ", - mp->priority, ((mp->vlan_qos >> 13) & 0x7)); - mp = mp->next; + mp->priority, ((vlan_qos >> 13) & 0x7)); + mp = rcu_dereference(mp->next); } } + rcu_read_unlock(); seq_puts(seq, "\n"); return 0; diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index 47af5a10e921..f9fb2db7a066 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -283,25 +283,33 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv) cancel_work_sync(&ring->work); - if (!priv->rings[i].intf) + if (!ring->intf) break; - if (priv->rings[i].irq > 0) - unbind_from_irqhandler(priv->rings[i].irq, ring); - if (priv->rings[i].data.in) { - for (j = 0; - j < (1 << priv->rings[i].intf->ring_order); + if (ring->irq >= 0) { + unbind_from_irqhandler(ring->irq, ring); + ring->irq = -1; + } + if (ring->data.in) { + for (j = 0; j < (1 << ring->intf->ring_order); j++) { grant_ref_t ref; - ref = priv->rings[i].intf->ref[j]; + ref = ring->intf->ref[j]; gnttab_end_foreign_access(ref, NULL); + ring->intf->ref[j] = INVALID_GRANT_REF; } - free_pages_exact(priv->rings[i].data.in, - 1UL << (priv->rings[i].intf->ring_order + - XEN_PAGE_SHIFT)); + free_pages_exact(ring->data.in, + 1UL << (ring->intf->ring_order + + XEN_PAGE_SHIFT)); + ring->data.in = NULL; + ring->data.out = NULL; + } + if (ring->ref != INVALID_GRANT_REF) { + gnttab_end_foreign_access(ring->ref, NULL); + ring->ref = INVALID_GRANT_REF; } - gnttab_end_foreign_access(priv->rings[i].ref, NULL); - free_page((unsigned long)priv->rings[i].intf); + free_page((unsigned long)ring->intf); + ring->intf = NULL; } kfree(priv->rings); } @@ -334,6 +342,12 @@ static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev, int ret = -ENOMEM; void *bytes = NULL; + ring->intf = NULL; + ring->data.in = NULL; + ring->data.out = NULL; + ring->ref = INVALID_GRANT_REF; + ring->irq = -1; + init_waitqueue_head(&ring->wq); spin_lock_init(&ring->lock); INIT_WORK(&ring->work, p9_xen_response); @@ -379,9 +393,18 @@ out: for (i--; i >= 0; i--) gnttab_end_foreign_access(ring->intf->ref[i], NULL); free_pages_exact(bytes, 1UL << (order + XEN_PAGE_SHIFT)); + ring->data.in = NULL; + ring->data.out = NULL; } - gnttab_end_foreign_access(ring->ref, NULL); - free_page((unsigned long)ring->intf); + if (ring->ref != INVALID_GRANT_REF) { + gnttab_end_foreign_access(ring->ref, NULL); + ring->ref = INVALID_GRANT_REF; + } + if (ring->intf) { + free_page((unsigned long)ring->intf); + ring->intf = NULL; + } + ring->irq = -1; return ret; } @@ -390,23 +413,29 @@ static int xen_9pfs_front_init(struct xenbus_device *dev) int ret, i; struct xenbus_transaction xbt; struct xen_9pfs_front_priv *priv; - char *versions, *v; - unsigned int max_rings, max_ring_order, len = 0; + char *versions, *v, *token; + bool version_1 = false; + unsigned int max_rings, max_ring_order, len = 0, version; versions = xenbus_read(XBT_NIL, dev->otherend, "versions", &len); if (IS_ERR(versions)) return PTR_ERR(versions); - for (v = versions; *v; v++) { - if (simple_strtoul(v, &v, 10) == 1) { - v = NULL; - break; + for (v = versions; (token = strsep(&v, ",")); ) { + if (!*token) + continue; + + ret = kstrtouint(token, 10, &version); + if (ret) { + kfree(versions); + return ret; } - } - if (v) { - kfree(versions); - return -EINVAL; + if (version == 1) + version_1 = true; } kfree(versions); + if (!version_1) + return -EINVAL; + max_rings = xenbus_read_unsigned(dev->otherend, "max-rings", 0); if (max_rings < XEN_9PFS_NUM_RINGS) return -EINVAL; diff --git a/net/Kconfig b/net/Kconfig index 62266eaf0e95..bdea8aef7983 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -414,7 +414,6 @@ endmenu # Network testing endmenu # Networking options -source "net/ax25/Kconfig" source "net/can/Kconfig" source "net/bluetooth/Kconfig" source "net/rxrpc/Kconfig" @@ -439,7 +438,6 @@ endif # WIRELESS source "net/rfkill/Kconfig" source "net/9p/Kconfig" -source "net/caif/Kconfig" source "net/ceph/Kconfig" source "net/nfc/Kconfig" source "net/psample/Kconfig" diff --git a/net/Makefile b/net/Makefile index 90e3d72bf58b..d2175fce0406 100644 --- a/net/Makefile +++ b/net/Makefile @@ -28,9 +28,6 @@ obj-y += dsa/ obj-$(CONFIG_ATALK) += appletalk/ obj-$(CONFIG_X25) += x25/ obj-$(CONFIG_LAPB) += lapb/ -obj-$(CONFIG_NETROM) += netrom/ -obj-$(CONFIG_ROSE) += rose/ -obj-$(CONFIG_AX25) += ax25/ obj-$(CONFIG_CAN) += can/ obj-$(CONFIG_BT) += bluetooth/ obj-$(CONFIG_SUNRPC) += sunrpc/ @@ -53,7 +50,6 @@ obj-$(CONFIG_IUCV) += iucv/ obj-$(CONFIG_SMC) += smc/ obj-$(CONFIG_RFKILL) += rfkill/ obj-$(CONFIG_NET_9P) += 9p/ -obj-$(CONFIG_CAIF) += caif/ obj-$(CONFIG_DCB) += dcb/ obj-$(CONFIG_6LOWPAN) += 6lowpan/ obj-$(CONFIG_IEEE802154) += ieee802154/ diff --git a/net/atm/Kconfig b/net/atm/Kconfig index 77343d57ff2a..dfdc3a8553ba 100644 --- a/net/atm/Kconfig +++ b/net/atm/Kconfig @@ -19,43 +19,6 @@ config ATM of ATM. See the file <file:Documentation/networking/atm.rst> for further details. -config ATM_CLIP - tristate "Classical IP over ATM" - depends on ATM && INET - help - Classical IP over ATM for PVCs and SVCs, supporting InARP and - ATMARP. If you want to communication with other IP hosts on your ATM - network, you will typically either say Y here or to "LAN Emulation - (LANE)" below. - -config ATM_CLIP_NO_ICMP - bool "Do NOT send ICMP if no neighbour" - depends on ATM_CLIP - help - Normally, an "ICMP host unreachable" message is sent if a neighbour - cannot be reached because there is no VC to it in the kernel's - ATMARP table. This may cause problems when ATMARP table entries are - briefly removed during revalidation. If you say Y here, packets to - such neighbours are silently discarded instead. - -config ATM_LANE - tristate "LAN Emulation (LANE) support" - depends on ATM - help - LAN Emulation emulates services of existing LANs across an ATM - network. Besides operating as a normal ATM end station client, Linux - LANE client can also act as an proxy client bridging packets between - ELAN and Ethernet segments. You need LANE if you want to try MPOA. - -config ATM_MPOA - tristate "Multi-Protocol Over ATM (MPOA) support" - depends on ATM && INET && ATM_LANE!=n - help - Multi-Protocol Over ATM allows ATM edge devices such as routers, - bridges and ATM attached hosts establish direct ATM VCs across - subnetwork boundaries. These shortcut connections bypass routers - enhancing overall network performance. - config ATM_BR2684 tristate "RFC1483/2684 Bridged protocols" depends on ATM && INET diff --git a/net/atm/Makefile b/net/atm/Makefile index bfec0f2d83b5..484a1b1552cc 100644 --- a/net/atm/Makefile +++ b/net/atm/Makefile @@ -4,13 +4,9 @@ # atm-y := addr.o pvc.o signaling.o svc.o ioctl.o common.o atm_misc.o raw.o resources.o atm_sysfs.o -mpoa-objs := mpc.o mpoa_caches.o mpoa_proc.o obj-$(CONFIG_ATM) += atm.o -obj-$(CONFIG_ATM_CLIP) += clip.o obj-$(CONFIG_ATM_BR2684) += br2684.o atm-$(CONFIG_PROC_FS) += proc.o -obj-$(CONFIG_ATM_LANE) += lec.o -obj-$(CONFIG_ATM_MPOA) += mpoa.o obj-$(CONFIG_PPPOATM) += pppoatm.o diff --git a/net/atm/clip.c b/net/atm/clip.c deleted file mode 100644 index 516b2214680b..000000000000 --- a/net/atm/clip.c +++ /dev/null @@ -1,960 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* net/atm/clip.c - RFC1577 Classical IP over ATM */ - -/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ - -#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ - -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/kernel.h> /* for UINT_MAX */ -#include <linux/module.h> -#include <linux/init.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <linux/wait.h> -#include <linux/timer.h> -#include <linux/if_arp.h> /* for some manifest constants */ -#include <linux/notifier.h> -#include <linux/atm.h> -#include <linux/atmdev.h> -#include <linux/atmclip.h> -#include <linux/atmarp.h> -#include <linux/capability.h> -#include <linux/ip.h> /* for net/route.h */ -#include <linux/in.h> /* for struct sockaddr_in */ -#include <linux/if.h> /* for IFF_UP */ -#include <linux/inetdevice.h> -#include <linux/bitops.h> -#include <linux/poison.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/rcupdate.h> -#include <linux/jhash.h> -#include <linux/slab.h> -#include <net/route.h> /* for struct rtable and routing */ -#include <net/icmp.h> /* icmp_send */ -#include <net/arp.h> -#include <linux/param.h> /* for HZ */ -#include <linux/uaccess.h> -#include <asm/byteorder.h> /* for htons etc. */ -#include <linux/atomic.h> - -#include "common.h" -#include "resources.h" -#include <net/atmclip.h> - -static struct net_device *clip_devs; -static struct atm_vcc __rcu *atmarpd; -static DEFINE_MUTEX(atmarpd_lock); -static struct timer_list idle_timer; -static const struct neigh_ops clip_neigh_ops; - -static int to_atmarpd(enum atmarp_ctrl_type type, int itf, __be32 ip) -{ - struct sock *sk; - struct atmarp_ctrl *ctrl; - struct atm_vcc *vcc; - struct sk_buff *skb; - int err = 0; - - pr_debug("(%d)\n", type); - - rcu_read_lock(); - vcc = rcu_dereference(atmarpd); - if (!vcc) { - err = -EUNATCH; - goto unlock; - } - skb = alloc_skb(sizeof(struct atmarp_ctrl), GFP_ATOMIC); - if (!skb) { - err = -ENOMEM; - goto unlock; - } - ctrl = skb_put(skb, sizeof(struct atmarp_ctrl)); - ctrl->type = type; - ctrl->itf_num = itf; - ctrl->ip = ip; - atm_force_charge(vcc, skb->truesize); - - sk = sk_atm(vcc); - skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk); -unlock: - rcu_read_unlock(); - return err; -} - -static void link_vcc(struct clip_vcc *clip_vcc, struct atmarp_entry *entry) -{ - pr_debug("%p to entry %p (neigh %p)\n", clip_vcc, entry, entry->neigh); - clip_vcc->entry = entry; - clip_vcc->xoff = 0; /* @@@ may overrun buffer by one packet */ - clip_vcc->next = entry->vccs; - entry->vccs = clip_vcc; - entry->neigh->used = jiffies; -} - -static void unlink_clip_vcc(struct clip_vcc *clip_vcc) -{ - struct atmarp_entry *entry = clip_vcc->entry; - struct clip_vcc **walk; - - if (!entry) { - pr_err("!clip_vcc->entry (clip_vcc %p)\n", clip_vcc); - return; - } - netif_tx_lock_bh(entry->neigh->dev); /* block clip_start_xmit() */ - entry->neigh->used = jiffies; - for (walk = &entry->vccs; *walk; walk = &(*walk)->next) - if (*walk == clip_vcc) { - int error; - - *walk = clip_vcc->next; /* atomic */ - clip_vcc->entry = NULL; - if (clip_vcc->xoff) - netif_wake_queue(entry->neigh->dev); - if (entry->vccs) - goto out; - entry->expires = jiffies - 1; - /* force resolution or expiration */ - error = neigh_update(entry->neigh, NULL, NUD_NONE, - NEIGH_UPDATE_F_ADMIN, 0); - if (error) - pr_err("neigh_update failed with %d\n", error); - goto out; - } - pr_err("ATMARP: failed (entry %p, vcc 0x%p)\n", entry, clip_vcc); -out: - netif_tx_unlock_bh(entry->neigh->dev); -} - -/* The neighbour entry n->lock is held. */ -static int neigh_check_cb(struct neighbour *n) -{ - struct atmarp_entry *entry = neighbour_priv(n); - struct clip_vcc *cv; - - if (n->ops != &clip_neigh_ops) - return 0; - for (cv = entry->vccs; cv; cv = cv->next) { - unsigned long exp = cv->last_use + cv->idle_timeout; - - if (cv->idle_timeout && time_after(jiffies, exp)) { - pr_debug("releasing vcc %p->%p of entry %p\n", - cv, cv->vcc, entry); - vcc_release_async(cv->vcc, -ETIMEDOUT); - } - } - - if (entry->vccs || time_before(jiffies, entry->expires)) - return 0; - - if (refcount_read(&n->refcnt) > 1) { - struct sk_buff *skb; - - pr_debug("destruction postponed with ref %d\n", - refcount_read(&n->refcnt)); - - while ((skb = skb_dequeue(&n->arp_queue)) != NULL) - dev_kfree_skb(skb); - - return 0; - } - - pr_debug("expired neigh %p\n", n); - return 1; -} - -static void idle_timer_check(struct timer_list *unused) -{ - spin_lock(&arp_tbl.lock); - __neigh_for_each_release(&arp_tbl, neigh_check_cb); - mod_timer(&idle_timer, jiffies + CLIP_CHECK_INTERVAL * HZ); - spin_unlock(&arp_tbl.lock); -} - -static int clip_arp_rcv(struct sk_buff *skb) -{ - struct atm_vcc *vcc; - - pr_debug("\n"); - vcc = ATM_SKB(skb)->vcc; - if (!vcc || !atm_charge(vcc, skb->truesize)) { - dev_kfree_skb_any(skb); - return 0; - } - pr_debug("pushing to %p\n", vcc); - pr_debug("using %p\n", CLIP_VCC(vcc)->old_push); - CLIP_VCC(vcc)->old_push(vcc, skb); - return 0; -} - -static const unsigned char llc_oui[] = { - 0xaa, /* DSAP: non-ISO */ - 0xaa, /* SSAP: non-ISO */ - 0x03, /* Ctrl: Unnumbered Information Command PDU */ - 0x00, /* OUI: EtherType */ - 0x00, - 0x00 -}; - -static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb) -{ - struct clip_vcc *clip_vcc = CLIP_VCC(vcc); - - pr_debug("\n"); - - if (!skb) { - pr_debug("removing VCC %p\n", clip_vcc); - if (clip_vcc->entry) - unlink_clip_vcc(clip_vcc); - clip_vcc->old_push(vcc, NULL); /* pass on the bad news */ - kfree(clip_vcc); - return; - } - atm_return(vcc, skb->truesize); - if (!clip_devs) { - kfree_skb(skb); - return; - } - - skb->dev = clip_vcc->entry ? clip_vcc->entry->neigh->dev : clip_devs; - /* clip_vcc->entry == NULL if we don't have an IP address yet */ - if (!skb->dev) { - dev_kfree_skb_any(skb); - return; - } - ATM_SKB(skb)->vcc = vcc; - skb_reset_mac_header(skb); - if (!clip_vcc->encap || - skb->len < RFC1483LLC_LEN || - memcmp(skb->data, llc_oui, sizeof(llc_oui))) - skb->protocol = htons(ETH_P_IP); - else { - skb->protocol = ((__be16 *)skb->data)[3]; - skb_pull(skb, RFC1483LLC_LEN); - if (skb->protocol == htons(ETH_P_ARP)) { - skb->dev->stats.rx_packets++; - skb->dev->stats.rx_bytes += skb->len; - clip_arp_rcv(skb); - return; - } - } - clip_vcc->last_use = jiffies; - skb->dev->stats.rx_packets++; - skb->dev->stats.rx_bytes += skb->len; - memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data)); - netif_rx(skb); -} - -/* - * Note: these spinlocks _must_not_ block on non-SMP. The only goal is that - * clip_pop is atomic with respect to the critical section in clip_start_xmit. - */ - -static void clip_pop(struct atm_vcc *vcc, struct sk_buff *skb) -{ - struct clip_vcc *clip_vcc = CLIP_VCC(vcc); - struct net_device *dev = skb->dev; - int old; - unsigned long flags; - - pr_debug("(vcc %p)\n", vcc); - clip_vcc->old_pop(vcc, skb); - /* skb->dev == NULL in outbound ARP packets */ - if (!dev) - return; - spin_lock_irqsave(&PRIV(dev)->xoff_lock, flags); - if (atm_may_send(vcc, 0)) { - old = xchg(&clip_vcc->xoff, 0); - if (old) - netif_wake_queue(dev); - } - spin_unlock_irqrestore(&PRIV(dev)->xoff_lock, flags); -} - -static void clip_neigh_solicit(struct neighbour *neigh, struct sk_buff *skb) -{ - __be32 *ip = (__be32 *) neigh->primary_key; - - pr_debug("(neigh %p, skb %p)\n", neigh, skb); - to_atmarpd(act_need, PRIV(neigh->dev)->number, *ip); -} - -static void clip_neigh_error(struct neighbour *neigh, struct sk_buff *skb) -{ -#ifndef CONFIG_ATM_CLIP_NO_ICMP - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); -#endif - kfree_skb(skb); -} - -static const struct neigh_ops clip_neigh_ops = { - .family = AF_INET, - .solicit = clip_neigh_solicit, - .error_report = clip_neigh_error, - .output = neigh_direct_output, - .connected_output = neigh_direct_output, -}; - -static int clip_constructor(struct net_device *dev, struct neighbour *neigh) -{ - struct atmarp_entry *entry = neighbour_priv(neigh); - - if (neigh->tbl->family != AF_INET) - return -EINVAL; - - if (neigh->type != RTN_UNICAST) - return -EINVAL; - - neigh->nud_state = NUD_NONE; - neigh->ops = &clip_neigh_ops; - neigh->output = neigh->ops->output; - entry->neigh = neigh; - entry->vccs = NULL; - entry->expires = jiffies - 1; - - return 0; -} - -/* @@@ copy bh locking from arp.c -- need to bh-enable atm code before */ - -/* - * We play with the resolve flag: 0 and 1 have the usual meaning, but -1 means - * to allocate the neighbour entry but not to ask atmarpd for resolution. Also, - * don't increment the usage count. This is used to create entries in - * clip_setentry. - */ - -static int clip_encap(struct atm_vcc *vcc, int mode) -{ - if (!CLIP_VCC(vcc)) - return -EBADFD; - - CLIP_VCC(vcc)->encap = mode; - return 0; -} - -static netdev_tx_t clip_start_xmit(struct sk_buff *skb, - struct net_device *dev) -{ - struct clip_priv *clip_priv = PRIV(dev); - struct dst_entry *dst = skb_dst(skb); - struct atmarp_entry *entry; - struct neighbour *n; - struct atm_vcc *vcc; - struct rtable *rt; - __be32 *daddr; - int old; - unsigned long flags; - - pr_debug("(skb %p)\n", skb); - if (!dst) { - pr_err("skb_dst(skb) == NULL\n"); - dev_kfree_skb(skb); - dev->stats.tx_dropped++; - return NETDEV_TX_OK; - } - rt = dst_rtable(dst); - if (rt->rt_gw_family == AF_INET) - daddr = &rt->rt_gw4; - else - daddr = &ip_hdr(skb)->daddr; - n = dst_neigh_lookup(dst, daddr); - if (!n) { - pr_err("NO NEIGHBOUR !\n"); - dev_kfree_skb(skb); - dev->stats.tx_dropped++; - return NETDEV_TX_OK; - } - entry = neighbour_priv(n); - if (!entry->vccs) { - if (time_after(jiffies, entry->expires)) { - /* should be resolved */ - entry->expires = jiffies + ATMARP_RETRY_DELAY * HZ; - to_atmarpd(act_need, PRIV(dev)->number, *((__be32 *)n->primary_key)); - } - if (entry->neigh->arp_queue.qlen < ATMARP_MAX_UNRES_PACKETS) - skb_queue_tail(&entry->neigh->arp_queue, skb); - else { - dev_kfree_skb(skb); - dev->stats.tx_dropped++; - } - goto out_release_neigh; - } - pr_debug("neigh %p, vccs %p\n", entry, entry->vccs); - ATM_SKB(skb)->vcc = vcc = entry->vccs->vcc; - pr_debug("using neighbour %p, vcc %p\n", n, vcc); - if (entry->vccs->encap) { - void *here; - - here = skb_push(skb, RFC1483LLC_LEN); - memcpy(here, llc_oui, sizeof(llc_oui)); - ((__be16 *) here)[3] = skb->protocol; - } - atm_account_tx(vcc, skb); - entry->vccs->last_use = jiffies; - pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, vcc, vcc->dev); - old = xchg(&entry->vccs->xoff, 1); /* assume XOFF ... */ - if (old) { - pr_warn("XOFF->XOFF transition\n"); - goto out_release_neigh; - } - dev->stats.tx_packets++; - dev->stats.tx_bytes += skb->len; - vcc->send(vcc, skb); - if (atm_may_send(vcc, 0)) { - entry->vccs->xoff = 0; - goto out_release_neigh; - } - spin_lock_irqsave(&clip_priv->xoff_lock, flags); - netif_stop_queue(dev); /* XOFF -> throttle immediately */ - barrier(); - if (!entry->vccs->xoff) - netif_start_queue(dev); - /* Oh, we just raced with clip_pop. netif_start_queue should be - good enough, because nothing should really be asleep because - of the brief netif_stop_queue. If this isn't true or if it - changes, use netif_wake_queue instead. */ - spin_unlock_irqrestore(&clip_priv->xoff_lock, flags); -out_release_neigh: - neigh_release(n); - return NETDEV_TX_OK; -} - -static int clip_mkip(struct atm_vcc *vcc, int timeout) -{ - struct clip_vcc *clip_vcc; - - if (!vcc->push) - return -EBADFD; - if (vcc->user_back) - return -EINVAL; - clip_vcc = kmalloc_obj(struct clip_vcc); - if (!clip_vcc) - return -ENOMEM; - pr_debug("%p vcc %p\n", clip_vcc, vcc); - clip_vcc->vcc = vcc; - vcc->user_back = clip_vcc; - set_bit(ATM_VF_IS_CLIP, &vcc->flags); - clip_vcc->entry = NULL; - clip_vcc->xoff = 0; - clip_vcc->encap = 1; - clip_vcc->last_use = jiffies; - clip_vcc->idle_timeout = timeout * HZ; - clip_vcc->old_push = vcc->push; - clip_vcc->old_pop = vcc->pop; - vcc->push = clip_push; - vcc->pop = clip_pop; - - /* re-process everything received between connection setup and MKIP */ - vcc_process_recv_queue(vcc); - - return 0; -} - -static int clip_setentry(struct atm_vcc *vcc, __be32 ip) -{ - struct neighbour *neigh; - struct atmarp_entry *entry; - int error; - struct clip_vcc *clip_vcc; - struct rtable *rt; - - if (vcc->push != clip_push) { - pr_warn("non-CLIP VCC\n"); - return -EBADF; - } - clip_vcc = CLIP_VCC(vcc); - if (!ip) { - if (!clip_vcc->entry) { - pr_err("hiding hidden ATMARP entry\n"); - return 0; - } - pr_debug("remove\n"); - unlink_clip_vcc(clip_vcc); - return 0; - } - rt = ip_route_output(&init_net, ip, 0, 0, 0, RT_SCOPE_LINK); - if (IS_ERR(rt)) - return PTR_ERR(rt); - neigh = __neigh_lookup(&arp_tbl, &ip, rt->dst.dev, 1); - ip_rt_put(rt); - if (!neigh) - return -ENOMEM; - entry = neighbour_priv(neigh); - if (entry != clip_vcc->entry) { - if (!clip_vcc->entry) - pr_debug("add\n"); - else { - pr_debug("update\n"); - unlink_clip_vcc(clip_vcc); - } - link_vcc(clip_vcc, entry); - } - error = neigh_update(neigh, llc_oui, NUD_PERMANENT, - NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN, 0); - neigh_release(neigh); - return error; -} - -static const struct net_device_ops clip_netdev_ops = { - .ndo_start_xmit = clip_start_xmit, - .ndo_neigh_construct = clip_constructor, -}; - -static void clip_setup(struct net_device *dev) -{ - dev->netdev_ops = &clip_netdev_ops; - dev->type = ARPHRD_ATM; - dev->neigh_priv_len = sizeof(struct atmarp_entry); - dev->hard_header_len = RFC1483LLC_LEN; - dev->mtu = RFC1626_MTU; - dev->tx_queue_len = 100; /* "normal" queue (packets) */ - /* When using a "real" qdisc, the qdisc determines the queue */ - /* length. tx_queue_len is only used for the default case, */ - /* without any more elaborate queuing. 100 is a reasonable */ - /* compromise between decent burst-tolerance and protection */ - /* against memory hogs. */ - netif_keep_dst(dev); -} - -static int clip_create(int number) -{ - struct net_device *dev; - struct clip_priv *clip_priv; - int error; - - if (number != -1) { - for (dev = clip_devs; dev; dev = PRIV(dev)->next) - if (PRIV(dev)->number == number) - return -EEXIST; - } else { - number = 0; - for (dev = clip_devs; dev; dev = PRIV(dev)->next) - if (PRIV(dev)->number >= number) - number = PRIV(dev)->number + 1; - } - dev = alloc_netdev(sizeof(struct clip_priv), "", NET_NAME_UNKNOWN, - clip_setup); - if (!dev) - return -ENOMEM; - clip_priv = PRIV(dev); - sprintf(dev->name, "atm%d", number); - spin_lock_init(&clip_priv->xoff_lock); - clip_priv->number = number; - error = register_netdev(dev); - if (error) { - free_netdev(dev); - return error; - } - clip_priv->next = clip_devs; - clip_devs = dev; - pr_debug("registered (net:%s)\n", dev->name); - return number; -} - -static int clip_device_event(struct notifier_block *this, unsigned long event, - void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; - - if (event == NETDEV_UNREGISTER) - return NOTIFY_DONE; - - /* ignore non-CLIP devices */ - if (dev->type != ARPHRD_ATM || dev->netdev_ops != &clip_netdev_ops) - return NOTIFY_DONE; - - switch (event) { - case NETDEV_UP: - pr_debug("NETDEV_UP\n"); - to_atmarpd(act_up, PRIV(dev)->number, 0); - break; - case NETDEV_GOING_DOWN: - pr_debug("NETDEV_DOWN\n"); - to_atmarpd(act_down, PRIV(dev)->number, 0); - break; - case NETDEV_CHANGE: - case NETDEV_CHANGEMTU: - pr_debug("NETDEV_CHANGE*\n"); - to_atmarpd(act_change, PRIV(dev)->number, 0); - break; - } - return NOTIFY_DONE; -} - -static int clip_inet_event(struct notifier_block *this, unsigned long event, - void *ifa) -{ - struct in_device *in_dev; - struct netdev_notifier_info info; - - in_dev = ((struct in_ifaddr *)ifa)->ifa_dev; - /* - * Transitions are of the down-change-up type, so it's sufficient to - * handle the change on up. - */ - if (event != NETDEV_UP) - return NOTIFY_DONE; - netdev_notifier_info_init(&info, in_dev->dev); - return clip_device_event(this, NETDEV_CHANGE, &info); -} - -static struct notifier_block clip_dev_notifier = { - .notifier_call = clip_device_event, -}; - - - -static struct notifier_block clip_inet_notifier = { - .notifier_call = clip_inet_event, -}; - - - -static void atmarpd_close(struct atm_vcc *vcc) -{ - pr_debug("\n"); - - mutex_lock(&atmarpd_lock); - RCU_INIT_POINTER(atmarpd, NULL); - mutex_unlock(&atmarpd_lock); - - synchronize_rcu(); - skb_queue_purge(&sk_atm(vcc)->sk_receive_queue); - - pr_debug("(done)\n"); - module_put(THIS_MODULE); -} - -static int atmarpd_send(struct atm_vcc *vcc, struct sk_buff *skb) -{ - atm_return_tx(vcc, skb); - dev_kfree_skb_any(skb); - return 0; -} - -static const struct atmdev_ops atmarpd_dev_ops = { - .close = atmarpd_close, - .send = atmarpd_send -}; - - -static struct atm_dev atmarpd_dev = { - .ops = &atmarpd_dev_ops, - .type = "arpd", - .number = 999, - .lock = __SPIN_LOCK_UNLOCKED(atmarpd_dev.lock) -}; - - -static int atm_init_atmarp(struct atm_vcc *vcc) -{ - if (vcc->push == clip_push) - return -EINVAL; - - mutex_lock(&atmarpd_lock); - if (atmarpd) { - mutex_unlock(&atmarpd_lock); - return -EADDRINUSE; - } - - mod_timer(&idle_timer, jiffies + CLIP_CHECK_INTERVAL * HZ); - - rcu_assign_pointer(atmarpd, vcc); - set_bit(ATM_VF_META, &vcc->flags); - set_bit(ATM_VF_READY, &vcc->flags); - /* allow replies and avoid getting closed if signaling dies */ - vcc->dev = &atmarpd_dev; - vcc_insert_socket(sk_atm(vcc)); - vcc->push = NULL; - vcc->pop = NULL; /* crash */ - vcc->push_oam = NULL; /* crash */ - mutex_unlock(&atmarpd_lock); - return 0; -} - -static int clip_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) -{ - struct atm_vcc *vcc = ATM_SD(sock); - struct sock *sk = sock->sk; - int err = 0; - - switch (cmd) { - case SIOCMKCLIP: - case ATMARPD_CTRL: - case ATMARP_MKIP: - case ATMARP_SETENTRY: - case ATMARP_ENCAP: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - break; - default: - return -ENOIOCTLCMD; - } - - switch (cmd) { - case SIOCMKCLIP: - err = clip_create(arg); - break; - case ATMARPD_CTRL: - lock_sock(sk); - err = atm_init_atmarp(vcc); - if (!err) { - sock->state = SS_CONNECTED; - __module_get(THIS_MODULE); - } - release_sock(sk); - break; - case ATMARP_MKIP: - lock_sock(sk); - err = clip_mkip(vcc, arg); - release_sock(sk); - break; - case ATMARP_SETENTRY: - err = clip_setentry(vcc, (__force __be32)arg); - break; - case ATMARP_ENCAP: - err = clip_encap(vcc, arg); - break; - } - return err; -} - -static struct atm_ioctl clip_ioctl_ops = { - .owner = THIS_MODULE, - .ioctl = clip_ioctl, -}; - -#ifdef CONFIG_PROC_FS - -static void svc_addr(struct seq_file *seq, struct sockaddr_atmsvc *addr) -{ - static int code[] = { 1, 2, 10, 6, 1, 0 }; - static int e164[] = { 1, 8, 4, 6, 1, 0 }; - - if (*addr->sas_addr.pub) { - seq_printf(seq, "%s", addr->sas_addr.pub); - if (*addr->sas_addr.prv) - seq_putc(seq, '+'); - } else if (!*addr->sas_addr.prv) { - seq_printf(seq, "%s", "(none)"); - return; - } - if (*addr->sas_addr.prv) { - unsigned char *prv = addr->sas_addr.prv; - int *fields; - int i, j; - - fields = *prv == ATM_AFI_E164 ? e164 : code; - for (i = 0; fields[i]; i++) { - for (j = fields[i]; j; j--) - seq_printf(seq, "%02X", *prv++); - if (fields[i + 1]) - seq_putc(seq, '.'); - } - } -} - -/* This means the neighbour entry has no attached VCC objects. */ -#define SEQ_NO_VCC_TOKEN ((void *) 2) - -static void atmarp_info(struct seq_file *seq, struct neighbour *n, - struct atmarp_entry *entry, struct clip_vcc *clip_vcc) -{ - struct net_device *dev = n->dev; - unsigned long exp; - char buf[17]; - int svc, llc, off; - - svc = ((clip_vcc == SEQ_NO_VCC_TOKEN) || - (sk_atm(clip_vcc->vcc)->sk_family == AF_ATMSVC)); - - llc = ((clip_vcc == SEQ_NO_VCC_TOKEN) || clip_vcc->encap); - - if (clip_vcc == SEQ_NO_VCC_TOKEN) - exp = entry->neigh->used; - else - exp = clip_vcc->last_use; - - exp = (jiffies - exp) / HZ; - - seq_printf(seq, "%-6s%-4s%-4s%5ld ", - dev->name, svc ? "SVC" : "PVC", llc ? "LLC" : "NULL", exp); - - off = scnprintf(buf, sizeof(buf) - 1, "%pI4", n->primary_key); - while (off < 16) - buf[off++] = ' '; - buf[off] = '\0'; - seq_printf(seq, "%s", buf); - - if (clip_vcc == SEQ_NO_VCC_TOKEN) { - if (time_before(jiffies, entry->expires)) - seq_printf(seq, "(resolving)\n"); - else - seq_printf(seq, "(expired, ref %d)\n", - refcount_read(&entry->neigh->refcnt)); - } else if (!svc) { - seq_printf(seq, "%d.%d.%d\n", - clip_vcc->vcc->dev->number, - clip_vcc->vcc->vpi, clip_vcc->vcc->vci); - } else { - svc_addr(seq, &clip_vcc->vcc->remote); - seq_putc(seq, '\n'); - } -} - -struct clip_seq_state { - /* This member must be first. */ - struct neigh_seq_state ns; - - /* Local to clip specific iteration. */ - struct clip_vcc *vcc; -}; - -static struct clip_vcc *clip_seq_next_vcc(struct atmarp_entry *e, - struct clip_vcc *curr) -{ - if (!curr) { - curr = e->vccs; - if (!curr) - return SEQ_NO_VCC_TOKEN; - return curr; - } - if (curr == SEQ_NO_VCC_TOKEN) - return NULL; - - curr = curr->next; - - return curr; -} - -static void *clip_seq_vcc_walk(struct clip_seq_state *state, - struct atmarp_entry *e, loff_t * pos) -{ - struct clip_vcc *vcc = state->vcc; - - vcc = clip_seq_next_vcc(e, vcc); - if (vcc && pos != NULL) { - while (*pos) { - vcc = clip_seq_next_vcc(e, vcc); - if (!vcc) - break; - --(*pos); - } - } - state->vcc = vcc; - - return vcc; -} - -static void *clip_seq_sub_iter(struct neigh_seq_state *_state, - struct neighbour *n, loff_t * pos) -{ - struct clip_seq_state *state = (struct clip_seq_state *)_state; - - if (n->dev->type != ARPHRD_ATM) - return NULL; - - return clip_seq_vcc_walk(state, neighbour_priv(n), pos); -} - -static void *clip_seq_start(struct seq_file *seq, loff_t * pos) -{ - struct clip_seq_state *state = seq->private; - state->ns.neigh_sub_iter = clip_seq_sub_iter; - return neigh_seq_start(seq, pos, &arp_tbl, NEIGH_SEQ_NEIGH_ONLY); -} - -static int clip_seq_show(struct seq_file *seq, void *v) -{ - static char atm_arp_banner[] = - "IPitf TypeEncp Idle IP address ATM address\n"; - - if (v == SEQ_START_TOKEN) { - seq_puts(seq, atm_arp_banner); - } else { - struct clip_seq_state *state = seq->private; - struct clip_vcc *vcc = state->vcc; - struct neighbour *n = v; - - atmarp_info(seq, n, neighbour_priv(n), vcc); - } - return 0; -} - -static const struct seq_operations arp_seq_ops = { - .start = clip_seq_start, - .next = neigh_seq_next, - .stop = neigh_seq_stop, - .show = clip_seq_show, -}; -#endif - -static void atm_clip_exit_noproc(void); - -static int __init atm_clip_init(void) -{ - register_atm_ioctl(&clip_ioctl_ops); - register_netdevice_notifier(&clip_dev_notifier); - register_inetaddr_notifier(&clip_inet_notifier); - - timer_setup(&idle_timer, idle_timer_check, 0); - -#ifdef CONFIG_PROC_FS - { - struct proc_dir_entry *p; - - p = proc_create_net("arp", 0444, atm_proc_root, &arp_seq_ops, - sizeof(struct clip_seq_state)); - if (!p) { - pr_err("Unable to initialize /proc/net/atm/arp\n"); - atm_clip_exit_noproc(); - return -ENOMEM; - } - } -#endif - - return 0; -} - -static void atm_clip_exit_noproc(void) -{ - struct net_device *dev, *next; - - unregister_inetaddr_notifier(&clip_inet_notifier); - unregister_netdevice_notifier(&clip_dev_notifier); - - deregister_atm_ioctl(&clip_ioctl_ops); - - /* First, stop the idle timer, so it stops banging - * on the table. - */ - timer_delete_sync(&idle_timer); - - dev = clip_devs; - while (dev) { - next = PRIV(dev)->next; - unregister_netdev(dev); - free_netdev(dev); - dev = next; - } -} - -static void __exit atm_clip_exit(void) -{ - remove_proc_entry("arp", atm_proc_root); - - atm_clip_exit_noproc(); -} - -module_init(atm_clip_init); -module_exit(atm_clip_exit); -MODULE_AUTHOR("Werner Almesberger"); -MODULE_DESCRIPTION("Classical/IP over ATM interface"); -MODULE_LICENSE("GPL"); diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c index 0f7a39aeccc8..0f3f9ad8301f 100644 --- a/net/atm/ioctl.c +++ b/net/atm/ioctl.c @@ -11,14 +11,10 @@ #include <linux/net.h> /* struct socket, struct proto_ops */ #include <linux/atm.h> /* ATM stuff */ #include <linux/atmdev.h> -#include <linux/atmclip.h> /* CLIP_*ENCAP */ #include <linux/atmarp.h> /* manifest constants */ #include <linux/capability.h> #include <linux/sonet.h> /* for ioctls */ #include <linux/atmsvc.h> -#include <linux/atmmpc.h> -#include <net/atmclip.h> -#include <linux/atmlec.h> #include <linux/mutex.h> #include <asm/ioctls.h> #include <net/compat.h> @@ -138,16 +134,6 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd, } break; } - case ATMMPC_CTRL: - case ATMMPC_DATA: - request_module("mpoa"); - break; - case ATMARPD_CTRL: - request_module("clip"); - break; - case ATMLEC_CTRL: - request_module("lec"); - break; } error = -ENOIOCTLCMD; diff --git a/net/atm/lec.c b/net/atm/lec.c deleted file mode 100644 index 10e260acf602..000000000000 --- a/net/atm/lec.c +++ /dev/null @@ -1,2274 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * lec.c: Lan Emulation driver - * - * Marko Kiiskila <mkiiskila@yahoo.com> - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ - -#include <linux/slab.h> -#include <linux/kernel.h> -#include <linux/bitops.h> -#include <linux/capability.h> - -/* We are ethernet device */ -#include <linux/if_ether.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <net/sock.h> -#include <linux/skbuff.h> -#include <linux/ip.h> -#include <asm/byteorder.h> -#include <linux/uaccess.h> -#include <net/arp.h> -#include <net/dst.h> -#include <linux/proc_fs.h> -#include <linux/spinlock.h> -#include <linux/seq_file.h> - -/* And atm device */ -#include <linux/atmdev.h> -#include <linux/atmlec.h> - -/* Proxy LEC knows about bridging */ -#if IS_ENABLED(CONFIG_BRIDGE) -#include "../bridge/br_private.h" - -static unsigned char bridge_ula_lec[] = { 0x01, 0x80, 0xc2, 0x00, 0x00 }; -#endif - -/* Modular too */ -#include <linux/module.h> -#include <linux/init.h> - -/* Hardening for Spectre-v1 */ -#include <linux/nospec.h> - -#include "lec.h" -#include "lec_arpc.h" -#include "resources.h" - -#define DUMP_PACKETS 0 /* - * 0 = None, - * 1 = 30 first bytes - * 2 = Whole packet - */ - -#define LEC_UNRES_QUE_LEN 8 /* - * number of tx packets to queue for a - * single destination while waiting for SVC - */ - -static int lec_open(struct net_device *dev); -static netdev_tx_t lec_start_xmit(struct sk_buff *skb, - struct net_device *dev); -static int lec_close(struct net_device *dev); -static struct lec_arp_table *lec_arp_find(struct lec_priv *priv, - const unsigned char *mac_addr); -static int lec_arp_remove(struct lec_priv *priv, - struct lec_arp_table *to_remove); -/* LANE2 functions */ -static void lane2_associate_ind(struct net_device *dev, const u8 *mac_address, - const u8 *tlvs, u32 sizeoftlvs); -static int lane2_resolve(struct net_device *dev, const u8 *dst_mac, int force, - u8 **tlvs, u32 *sizeoftlvs); -static int lane2_associate_req(struct net_device *dev, const u8 *lan_dst, - const u8 *tlvs, u32 sizeoftlvs); - -static int lec_addr_delete(struct lec_priv *priv, const unsigned char *atm_addr, - unsigned long permanent); -static void lec_arp_check_empties(struct lec_priv *priv, - struct atm_vcc *vcc, struct sk_buff *skb); -static void lec_arp_destroy(struct lec_priv *priv); -static void lec_arp_init(struct lec_priv *priv); -static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv, - const unsigned char *mac_to_find, - int is_rdesc, - struct lec_arp_table **ret_entry); -static void lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr, - const unsigned char *atm_addr, - unsigned long remoteflag, - unsigned int targetless_le_arp); -static void lec_flush_complete(struct lec_priv *priv, unsigned long tran_id); -static int lec_mcast_make(struct lec_priv *priv, struct atm_vcc *vcc); -static void lec_set_flush_tran_id(struct lec_priv *priv, - const unsigned char *atm_addr, - unsigned long tran_id); -static void lec_vcc_added(struct lec_priv *priv, - const struct atmlec_ioc *ioc_data, - struct atm_vcc *vcc, - void (*old_push)(struct atm_vcc *vcc, - struct sk_buff *skb)); -static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc); - -/* must be done under lec_arp_lock */ -static inline void lec_arp_hold(struct lec_arp_table *entry) -{ - refcount_inc(&entry->usage); -} - -static inline void lec_arp_put(struct lec_arp_table *entry) -{ - if (refcount_dec_and_test(&entry->usage)) - kfree(entry); -} - -static struct lane2_ops lane2_ops = { - .resolve = lane2_resolve, /* spec 3.1.3 */ - .associate_req = lane2_associate_req, /* spec 3.1.4 */ - .associate_indicator = NULL /* spec 3.1.5 */ -}; - -static unsigned char bus_mac[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; - -/* Device structures */ -static struct net_device *dev_lec[MAX_LEC_ITF]; -static DEFINE_MUTEX(lec_mutex); - -#if IS_ENABLED(CONFIG_BRIDGE) -static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev) -{ - char *buff; - struct lec_priv *priv; - - /* - * Check if this is a BPDU. If so, ask zeppelin to send - * LE_TOPOLOGY_REQUEST with the same value of Topology Change bit - * as the Config BPDU has - */ - buff = skb->data + skb->dev->hard_header_len; - if (*buff++ == 0x42 && *buff++ == 0x42 && *buff++ == 0x03) { - struct sock *sk; - struct sk_buff *skb2; - struct atmlec_msg *mesg; - - skb2 = alloc_skb(sizeof(struct atmlec_msg), GFP_ATOMIC); - if (skb2 == NULL) - return; - skb2->len = sizeof(struct atmlec_msg); - mesg = (struct atmlec_msg *)skb2->data; - mesg->type = l_topology_change; - buff += 4; - mesg->content.normal.flag = *buff & 0x01; - /* 0x01 is topology change */ - - priv = netdev_priv(dev); - struct atm_vcc *vcc; - - rcu_read_lock(); - vcc = rcu_dereference(priv->lecd); - if (vcc) { - atm_force_charge(vcc, skb2->truesize); - sk = sk_atm(vcc); - skb_queue_tail(&sk->sk_receive_queue, skb2); - sk->sk_data_ready(sk); - } else { - dev_kfree_skb(skb2); - } - rcu_read_unlock(); - } -} -#endif /* IS_ENABLED(CONFIG_BRIDGE) */ - -/* - * Open/initialize the netdevice. This is called (in the current kernel) - * sometime after booting when the 'ifconfig' program is run. - * - * This routine should set everything up anew at each open, even - * registers that "should" only need to be set once at boot, so that - * there is non-reboot way to recover if something goes wrong. - */ - -static int lec_open(struct net_device *dev) -{ - netif_start_queue(dev); - - return 0; -} - -static void -lec_send(struct atm_vcc *vcc, struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - unsigned int len = skb->len; - - ATM_SKB(skb)->vcc = vcc; - atm_account_tx(vcc, skb); - - if (vcc->send(vcc, skb) < 0) { - dev->stats.tx_dropped++; - return; - } - - dev->stats.tx_packets++; - dev->stats.tx_bytes += len; -} - -static void lec_tx_timeout(struct net_device *dev, unsigned int txqueue) -{ - pr_info("%s\n", dev->name); - netif_trans_update(dev); - netif_wake_queue(dev); -} - -static netdev_tx_t lec_start_xmit(struct sk_buff *skb, - struct net_device *dev) -{ - struct sk_buff *skb2; - struct lec_priv *priv = netdev_priv(dev); - struct lecdatahdr_8023 *lec_h; - struct atm_vcc *vcc; - struct lec_arp_table *entry; - unsigned char *dst; - int min_frame_size; - int is_rdesc; - - pr_debug("called\n"); - if (!rcu_access_pointer(priv->lecd)) { - pr_info("%s:No lecd attached\n", dev->name); - dev->stats.tx_errors++; - netif_stop_queue(dev); - kfree_skb(skb); - return NETDEV_TX_OK; - } - - pr_debug("skbuff head:%lx data:%lx tail:%lx end:%lx\n", - (long)skb->head, (long)skb->data, (long)skb_tail_pointer(skb), - (long)skb_end_pointer(skb)); -#if IS_ENABLED(CONFIG_BRIDGE) - if (memcmp(skb->data, bridge_ula_lec, sizeof(bridge_ula_lec)) == 0) - lec_handle_bridge(skb, dev); -#endif - - /* Make sure we have room for lec_id */ - if (skb_headroom(skb) < 2) { - pr_debug("reallocating skb\n"); - skb2 = skb_realloc_headroom(skb, LEC_HEADER_LEN); - if (unlikely(!skb2)) { - kfree_skb(skb); - return NETDEV_TX_OK; - } - consume_skb(skb); - skb = skb2; - } - skb_push(skb, 2); - - /* Put le header to place */ - lec_h = (struct lecdatahdr_8023 *)skb->data; - lec_h->le_header = htons(priv->lecid); - -#if DUMP_PACKETS >= 2 -#define MAX_DUMP_SKB 99 -#elif DUMP_PACKETS >= 1 -#define MAX_DUMP_SKB 30 -#endif -#if DUMP_PACKETS >= 1 - printk(KERN_DEBUG "%s: send datalen:%ld lecid:%4.4x\n", - dev->name, skb->len, priv->lecid); - print_hex_dump(KERN_DEBUG, "", DUMP_OFFSET, 16, 1, - skb->data, min(skb->len, MAX_DUMP_SKB), true); -#endif /* DUMP_PACKETS >= 1 */ - - /* Minimum ethernet-frame size */ - min_frame_size = LEC_MINIMUM_8023_SIZE; - if (skb->len < min_frame_size) { - if ((skb->len + skb_tailroom(skb)) < min_frame_size) { - skb2 = skb_copy_expand(skb, 0, - min_frame_size - skb->truesize, - GFP_ATOMIC); - dev_kfree_skb(skb); - if (skb2 == NULL) { - dev->stats.tx_dropped++; - return NETDEV_TX_OK; - } - skb = skb2; - } - skb_put(skb, min_frame_size - skb->len); - } - - /* Send to right vcc */ - is_rdesc = 0; - dst = lec_h->h_dest; - entry = NULL; - vcc = lec_arp_resolve(priv, dst, is_rdesc, &entry); - pr_debug("%s:vcc:%p vcc_flags:%lx, entry:%p\n", - dev->name, vcc, vcc ? vcc->flags : 0, entry); - if (!vcc || !test_bit(ATM_VF_READY, &vcc->flags)) { - if (entry && (entry->tx_wait.qlen < LEC_UNRES_QUE_LEN)) { - pr_debug("%s:queuing packet, MAC address %pM\n", - dev->name, lec_h->h_dest); - skb_queue_tail(&entry->tx_wait, skb); - } else { - pr_debug("%s:tx queue full or no arp entry, dropping, MAC address: %pM\n", - dev->name, lec_h->h_dest); - dev->stats.tx_dropped++; - dev_kfree_skb(skb); - } - goto out; - } -#if DUMP_PACKETS > 0 - printk(KERN_DEBUG "%s:sending to vpi:%d vci:%d\n", - dev->name, vcc->vpi, vcc->vci); -#endif /* DUMP_PACKETS > 0 */ - - while (entry && (skb2 = skb_dequeue(&entry->tx_wait))) { - pr_debug("emptying tx queue, MAC address %pM\n", lec_h->h_dest); - lec_send(vcc, skb2); - } - - lec_send(vcc, skb); - - if (!atm_may_send(vcc, 0)) { - struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc); - - vpriv->xoff = 1; - netif_stop_queue(dev); - - /* - * vcc->pop() might have occurred in between, making - * the vcc usuable again. Since xmit is serialized, - * this is the only situation we have to re-test. - */ - - if (atm_may_send(vcc, 0)) - netif_wake_queue(dev); - } - -out: - if (entry) - lec_arp_put(entry); - netif_trans_update(dev); - return NETDEV_TX_OK; -} - -/* The inverse routine to net_open(). */ -static int lec_close(struct net_device *dev) -{ - netif_stop_queue(dev); - return 0; -} - -static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) -{ - static const u8 zero_addr[ETH_ALEN] = {}; - unsigned long flags; - struct net_device *dev = (struct net_device *)vcc->proto_data; - struct lec_priv *priv = netdev_priv(dev); - struct atmlec_msg *mesg; - struct lec_arp_table *entry; - char *tmp; /* FIXME */ - - WARN_ON(refcount_sub_and_test(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc)); - mesg = (struct atmlec_msg *)skb->data; - tmp = skb->data; - tmp += sizeof(struct atmlec_msg); - pr_debug("%s: msg from zeppelin:%d\n", dev->name, mesg->type); - switch (mesg->type) { - case l_set_mac_addr: - eth_hw_addr_set(dev, mesg->content.normal.mac_addr); - break; - case l_del_mac_addr: - eth_hw_addr_set(dev, zero_addr); - break; - case l_addr_delete: - lec_addr_delete(priv, mesg->content.normal.atm_addr, - mesg->content.normal.flag); - break; - case l_topology_change: - priv->topology_change = mesg->content.normal.flag; - break; - case l_flush_complete: - lec_flush_complete(priv, mesg->content.normal.flag); - break; - case l_narp_req: /* LANE2: see 7.1.35 in the lane2 spec */ - spin_lock_irqsave(&priv->lec_arp_lock, flags); - entry = lec_arp_find(priv, mesg->content.normal.mac_addr); - lec_arp_remove(priv, entry); - spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - - if (mesg->content.normal.no_source_le_narp) - break; - fallthrough; - case l_arp_update: - lec_arp_update(priv, mesg->content.normal.mac_addr, - mesg->content.normal.atm_addr, - mesg->content.normal.flag, - mesg->content.normal.targetless_le_arp); - pr_debug("in l_arp_update\n"); - if (mesg->sizeoftlvs != 0) { /* LANE2 3.1.5 */ - pr_debug("LANE2 3.1.5, got tlvs, size %d\n", - mesg->sizeoftlvs); - lane2_associate_ind(dev, mesg->content.normal.mac_addr, - tmp, mesg->sizeoftlvs); - } - break; - case l_config: - priv->maximum_unknown_frame_count = - mesg->content.config.maximum_unknown_frame_count; - priv->max_unknown_frame_time = - (mesg->content.config.max_unknown_frame_time * HZ); - priv->max_retry_count = mesg->content.config.max_retry_count; - priv->aging_time = (mesg->content.config.aging_time * HZ); - priv->forward_delay_time = - (mesg->content.config.forward_delay_time * HZ); - priv->arp_response_time = - (mesg->content.config.arp_response_time * HZ); - priv->flush_timeout = (mesg->content.config.flush_timeout * HZ); - priv->path_switching_delay = - (mesg->content.config.path_switching_delay * HZ); - priv->lane_version = mesg->content.config.lane_version; - /* LANE2 */ - priv->lane2_ops = NULL; - if (priv->lane_version > 1) - priv->lane2_ops = &lane2_ops; - rtnl_lock(); - if (dev_set_mtu(dev, mesg->content.config.mtu)) - pr_info("%s: change_mtu to %d failed\n", - dev->name, mesg->content.config.mtu); - rtnl_unlock(); - priv->is_proxy = mesg->content.config.is_proxy; - break; - case l_flush_tran_id: - lec_set_flush_tran_id(priv, mesg->content.normal.atm_addr, - mesg->content.normal.flag); - break; - case l_set_lecid: - priv->lecid = - (unsigned short)(0xffff & mesg->content.normal.flag); - break; - case l_should_bridge: -#if IS_ENABLED(CONFIG_BRIDGE) - { - pr_debug("%s: bridge zeppelin asks about %pM\n", - dev->name, mesg->content.proxy.mac_addr); - - if (br_fdb_test_addr_hook == NULL) - break; - - if (br_fdb_test_addr_hook(dev, mesg->content.proxy.mac_addr)) { - /* hit from bridge table, send LE_ARP_RESPONSE */ - struct sk_buff *skb2; - struct sock *sk; - - pr_debug("%s: entry found, responding to zeppelin\n", - dev->name); - skb2 = alloc_skb(sizeof(struct atmlec_msg), GFP_ATOMIC); - if (skb2 == NULL) - break; - skb2->len = sizeof(struct atmlec_msg); - skb_copy_to_linear_data(skb2, mesg, sizeof(*mesg)); - struct atm_vcc *vcc; - - rcu_read_lock(); - vcc = rcu_dereference(priv->lecd); - if (vcc) { - atm_force_charge(vcc, skb2->truesize); - sk = sk_atm(vcc); - skb_queue_tail(&sk->sk_receive_queue, skb2); - sk->sk_data_ready(sk); - } else { - dev_kfree_skb(skb2); - } - rcu_read_unlock(); - } - } -#endif /* IS_ENABLED(CONFIG_BRIDGE) */ - break; - default: - pr_info("%s: Unknown message type %d\n", dev->name, mesg->type); - dev_kfree_skb(skb); - return -EINVAL; - } - dev_kfree_skb(skb); - return 0; -} - -static void lec_atm_close(struct atm_vcc *vcc) -{ - struct net_device *dev = (struct net_device *)vcc->proto_data; - struct lec_priv *priv = netdev_priv(dev); - - rcu_assign_pointer(priv->lecd, NULL); - synchronize_rcu(); - /* Do something needful? */ - - netif_stop_queue(dev); - lec_arp_destroy(priv); - - pr_info("%s: Shut down!\n", dev->name); - module_put(THIS_MODULE); -} - -static const struct atmdev_ops lecdev_ops = { - .close = lec_atm_close, - .send = lec_atm_send -}; - -static struct atm_dev lecatm_dev = { - .ops = &lecdev_ops, - .type = "lec", - .number = 999, /* dummy device number */ - .lock = __SPIN_LOCK_UNLOCKED(lecatm_dev.lock) -}; - -/* - * LANE2: new argument struct sk_buff *data contains - * the LE_ARP based TLVs introduced in the LANE2 spec - */ -static int -send_to_lecd(struct lec_priv *priv, atmlec_msg_type type, - const unsigned char *mac_addr, const unsigned char *atm_addr, - struct sk_buff *data) -{ - struct atm_vcc *vcc; - struct sock *sk; - struct sk_buff *skb; - struct atmlec_msg *mesg; - - if (!priv || !rcu_access_pointer(priv->lecd)) - return -1; - - skb = alloc_skb(sizeof(struct atmlec_msg), GFP_ATOMIC); - if (!skb) - return -1; - skb->len = sizeof(struct atmlec_msg); - mesg = (struct atmlec_msg *)skb->data; - memset(mesg, 0, sizeof(struct atmlec_msg)); - mesg->type = type; - if (data != NULL) - mesg->sizeoftlvs = data->len; - if (mac_addr) - ether_addr_copy(mesg->content.normal.mac_addr, mac_addr); - else - mesg->content.normal.targetless_le_arp = 1; - if (atm_addr) - memcpy(&mesg->content.normal.atm_addr, atm_addr, ATM_ESA_LEN); - - rcu_read_lock(); - vcc = rcu_dereference(priv->lecd); - if (!vcc) { - rcu_read_unlock(); - kfree_skb(skb); - return -1; - } - - atm_force_charge(vcc, skb->truesize); - sk = sk_atm(vcc); - skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk); - - if (data != NULL) { - pr_debug("about to send %d bytes of data\n", data->len); - atm_force_charge(vcc, data->truesize); - skb_queue_tail(&sk->sk_receive_queue, data); - sk->sk_data_ready(sk); - } - - rcu_read_unlock(); - return 0; -} - -static void lec_set_multicast_list(struct net_device *dev) -{ - /* - * by default, all multicast frames arrive over the bus. - * eventually support selective multicast service - */ -} - -static const struct net_device_ops lec_netdev_ops = { - .ndo_open = lec_open, - .ndo_stop = lec_close, - .ndo_start_xmit = lec_start_xmit, - .ndo_tx_timeout = lec_tx_timeout, - .ndo_set_rx_mode = lec_set_multicast_list, -}; - -static const unsigned char lec_ctrl_magic[] = { - 0xff, - 0x00, - 0x01, - 0x01 -}; - -#define LEC_DATA_DIRECT_8023 2 -#define LEC_DATA_DIRECT_8025 3 - -static int lec_is_data_direct(struct atm_vcc *vcc) -{ - return ((vcc->sap.blli[0].l3.tr9577.snap[4] == LEC_DATA_DIRECT_8023) || - (vcc->sap.blli[0].l3.tr9577.snap[4] == LEC_DATA_DIRECT_8025)); -} - -static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb) -{ - unsigned long flags; - struct net_device *dev = (struct net_device *)vcc->proto_data; - struct lec_priv *priv = netdev_priv(dev); - -#if DUMP_PACKETS > 0 - printk(KERN_DEBUG "%s: vcc vpi:%d vci:%d\n", - dev->name, vcc->vpi, vcc->vci); -#endif - if (!skb) { - pr_debug("%s: null skb\n", dev->name); - lec_vcc_close(priv, vcc); - return; - } -#if DUMP_PACKETS >= 2 -#define MAX_SKB_DUMP 99 -#elif DUMP_PACKETS >= 1 -#define MAX_SKB_DUMP 30 -#endif -#if DUMP_PACKETS > 0 - printk(KERN_DEBUG "%s: rcv datalen:%ld lecid:%4.4x\n", - dev->name, skb->len, priv->lecid); - print_hex_dump(KERN_DEBUG, "", DUMP_OFFSET, 16, 1, - skb->data, min(MAX_SKB_DUMP, skb->len), true); -#endif /* DUMP_PACKETS > 0 */ - if (memcmp(skb->data, lec_ctrl_magic, 4) == 0) { - /* Control frame, to daemon */ - struct sock *sk = sk_atm(vcc); - - pr_debug("%s: To daemon\n", dev->name); - skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk); - } else { /* Data frame, queue to protocol handlers */ - struct lec_arp_table *entry; - unsigned char *src, *dst; - - atm_return(vcc, skb->truesize); - if (*(__be16 *) skb->data == htons(priv->lecid) || - !rcu_access_pointer(priv->lecd) || !(dev->flags & IFF_UP)) { - /* - * Probably looping back, or if lecd is missing, - * lecd has gone down - */ - pr_debug("Ignoring frame...\n"); - dev_kfree_skb(skb); - return; - } - dst = ((struct lecdatahdr_8023 *)skb->data)->h_dest; - - /* - * If this is a Data Direct VCC, and the VCC does not match - * the LE_ARP cache entry, delete the LE_ARP cache entry. - */ - spin_lock_irqsave(&priv->lec_arp_lock, flags); - if (lec_is_data_direct(vcc)) { - src = ((struct lecdatahdr_8023 *)skb->data)->h_source; - entry = lec_arp_find(priv, src); - if (entry && entry->vcc != vcc) { - lec_arp_remove(priv, entry); - lec_arp_put(entry); - } - } - spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - - if (!(dst[0] & 0x01) && /* Never filter Multi/Broadcast */ - !priv->is_proxy && /* Proxy wants all the packets */ - memcmp(dst, dev->dev_addr, dev->addr_len)) { - dev_kfree_skb(skb); - return; - } - if (!hlist_empty(&priv->lec_arp_empty_ones)) - lec_arp_check_empties(priv, vcc, skb); - skb_pull(skb, 2); /* skip lec_id */ - skb->protocol = eth_type_trans(skb, dev); - dev->stats.rx_packets++; - dev->stats.rx_bytes += skb->len; - memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data)); - netif_rx(skb); - } -} - -static void lec_pop(struct atm_vcc *vcc, struct sk_buff *skb) -{ - struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc); - struct net_device *dev = skb->dev; - - if (vpriv == NULL) { - pr_info("vpriv = NULL!?!?!?\n"); - return; - } - - vpriv->old_pop(vcc, skb); - - if (vpriv->xoff && atm_may_send(vcc, 0)) { - vpriv->xoff = 0; - if (netif_running(dev) && netif_queue_stopped(dev)) - netif_wake_queue(dev); - } -} - -static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg) -{ - struct lec_vcc_priv *vpriv; - int bytes_left; - struct atmlec_ioc ioc_data; - - lockdep_assert_held(&lec_mutex); - /* Lecd must be up in this case */ - bytes_left = copy_from_user(&ioc_data, arg, sizeof(struct atmlec_ioc)); - if (bytes_left != 0) - pr_info("copy from user failed for %d bytes\n", bytes_left); - if (ioc_data.dev_num < 0 || ioc_data.dev_num >= MAX_LEC_ITF) - return -EINVAL; - ioc_data.dev_num = array_index_nospec(ioc_data.dev_num, MAX_LEC_ITF); - if (!dev_lec[ioc_data.dev_num]) - return -EINVAL; - vpriv = kmalloc_obj(struct lec_vcc_priv); - if (!vpriv) - return -ENOMEM; - vpriv->xoff = 0; - vpriv->old_pop = vcc->pop; - vcc->user_back = vpriv; - vcc->pop = lec_pop; - lec_vcc_added(netdev_priv(dev_lec[ioc_data.dev_num]), - &ioc_data, vcc, vcc->push); - vcc->proto_data = dev_lec[ioc_data.dev_num]; - vcc->push = lec_push; - return 0; -} - -static int lec_mcast_attach(struct atm_vcc *vcc, int arg) -{ - lockdep_assert_held(&lec_mutex); - if (arg < 0 || arg >= MAX_LEC_ITF) - return -EINVAL; - arg = array_index_nospec(arg, MAX_LEC_ITF); - if (!dev_lec[arg]) - return -EINVAL; - vcc->proto_data = dev_lec[arg]; - return lec_mcast_make(netdev_priv(dev_lec[arg]), vcc); -} - -/* Initialize device. */ -static int lecd_attach(struct atm_vcc *vcc, int arg) -{ - int i; - struct lec_priv *priv; - - lockdep_assert_held(&lec_mutex); - if (arg < 0) - arg = 0; - if (arg >= MAX_LEC_ITF) - return -EINVAL; - i = array_index_nospec(arg, MAX_LEC_ITF); - if (!dev_lec[i]) { - int size; - - size = sizeof(struct lec_priv); - dev_lec[i] = alloc_etherdev(size); - if (!dev_lec[i]) - return -ENOMEM; - dev_lec[i]->netdev_ops = &lec_netdev_ops; - dev_lec[i]->max_mtu = 18190; - snprintf(dev_lec[i]->name, IFNAMSIZ, "lec%d", i); - if (register_netdev(dev_lec[i])) { - free_netdev(dev_lec[i]); - dev_lec[i] = NULL; - return -EINVAL; - } - - priv = netdev_priv(dev_lec[i]); - } else { - priv = netdev_priv(dev_lec[i]); - if (rcu_access_pointer(priv->lecd)) - return -EADDRINUSE; - } - lec_arp_init(priv); - priv->itfnum = i; /* LANE2 addition */ - rcu_assign_pointer(priv->lecd, vcc); - vcc->dev = &lecatm_dev; - vcc_insert_socket(sk_atm(vcc)); - - vcc->proto_data = dev_lec[i]; - set_bit(ATM_VF_META, &vcc->flags); - set_bit(ATM_VF_READY, &vcc->flags); - - /* Set default values to these variables */ - priv->maximum_unknown_frame_count = 1; - priv->max_unknown_frame_time = (1 * HZ); - priv->vcc_timeout_period = (1200 * HZ); - priv->max_retry_count = 1; - priv->aging_time = (300 * HZ); - priv->forward_delay_time = (15 * HZ); - priv->topology_change = 0; - priv->arp_response_time = (1 * HZ); - priv->flush_timeout = (4 * HZ); - priv->path_switching_delay = (6 * HZ); - - if (dev_lec[i]->flags & IFF_UP) - netif_start_queue(dev_lec[i]); - __module_get(THIS_MODULE); - return i; -} - -#ifdef CONFIG_PROC_FS -static const char *lec_arp_get_status_string(unsigned char status) -{ - static const char *const lec_arp_status_string[] = { - "ESI_UNKNOWN ", - "ESI_ARP_PENDING ", - "ESI_VC_PENDING ", - "<Undefined> ", - "ESI_FLUSH_PENDING ", - "ESI_FORWARD_DIRECT" - }; - - if (status > ESI_FORWARD_DIRECT) - status = 3; /* ESI_UNDEFINED */ - return lec_arp_status_string[status]; -} - -static void lec_info(struct seq_file *seq, struct lec_arp_table *entry) -{ - seq_printf(seq, "%pM ", entry->mac_addr); - seq_printf(seq, "%*phN ", ATM_ESA_LEN, entry->atm_addr); - seq_printf(seq, "%s %4.4x", lec_arp_get_status_string(entry->status), - entry->flags & 0xffff); - if (entry->vcc) - seq_printf(seq, "%3d %3d ", entry->vcc->vpi, entry->vcc->vci); - else - seq_printf(seq, " "); - if (entry->recv_vcc) { - seq_printf(seq, " %3d %3d", entry->recv_vcc->vpi, - entry->recv_vcc->vci); - } - seq_putc(seq, '\n'); -} - -struct lec_state { - unsigned long flags; - struct lec_priv *locked; - struct hlist_node *node; - struct net_device *dev; - int itf; - int arp_table; - int misc_table; -}; - -static void *lec_tbl_walk(struct lec_state *state, struct hlist_head *tbl, - loff_t *l) -{ - struct hlist_node *e = state->node; - - if (!e) - e = tbl->first; - if (e == SEQ_START_TOKEN) { - e = tbl->first; - --*l; - } - - for (; e; e = e->next) { - if (--*l < 0) - break; - } - state->node = e; - - return (*l < 0) ? state : NULL; -} - -static void *lec_arp_walk(struct lec_state *state, loff_t *l, - struct lec_priv *priv) -{ - void *v = NULL; - int p; - - for (p = state->arp_table; p < LEC_ARP_TABLE_SIZE; p++) { - v = lec_tbl_walk(state, &priv->lec_arp_tables[p], l); - if (v) - break; - } - state->arp_table = p; - return v; -} - -static void *lec_misc_walk(struct lec_state *state, loff_t *l, - struct lec_priv *priv) -{ - struct hlist_head *lec_misc_tables[] = { - &priv->lec_arp_empty_ones, - &priv->lec_no_forward, - &priv->mcast_fwds - }; - void *v = NULL; - int q; - - for (q = state->misc_table; q < ARRAY_SIZE(lec_misc_tables); q++) { - v = lec_tbl_walk(state, lec_misc_tables[q], l); - if (v) - break; - } - state->misc_table = q; - return v; -} - -static void *lec_priv_walk(struct lec_state *state, loff_t *l, - struct lec_priv *priv) -{ - if (!state->locked) { - state->locked = priv; - spin_lock_irqsave(&priv->lec_arp_lock, state->flags); - } - if (!lec_arp_walk(state, l, priv) && !lec_misc_walk(state, l, priv)) { - spin_unlock_irqrestore(&priv->lec_arp_lock, state->flags); - state->locked = NULL; - /* Partial state reset for the next time we get called */ - state->arp_table = state->misc_table = 0; - } - return state->locked; -} - -static void *lec_itf_walk(struct lec_state *state, loff_t *l) -{ - struct net_device *dev; - void *v; - - dev = state->dev ? state->dev : dev_lec[state->itf]; - v = (dev && netdev_priv(dev)) ? - lec_priv_walk(state, l, netdev_priv(dev)) : NULL; - if (!v && dev) { - /* Partial state reset for the next time we get called */ - dev = NULL; - } - state->dev = dev; - return v; -} - -static void *lec_get_idx(struct lec_state *state, loff_t l) -{ - void *v = NULL; - - for (; state->itf < MAX_LEC_ITF; state->itf++) { - v = lec_itf_walk(state, &l); - if (v) - break; - } - return v; -} - -static void *lec_seq_start(struct seq_file *seq, loff_t *pos) -{ - struct lec_state *state = seq->private; - - mutex_lock(&lec_mutex); - state->itf = 0; - state->dev = NULL; - state->locked = NULL; - state->arp_table = 0; - state->misc_table = 0; - state->node = SEQ_START_TOKEN; - - return *pos ? lec_get_idx(state, *pos) : SEQ_START_TOKEN; -} - -static void lec_seq_stop(struct seq_file *seq, void *v) -{ - struct lec_state *state = seq->private; - - if (state->dev) { - spin_unlock_irqrestore(&state->locked->lec_arp_lock, - state->flags); - state->dev = NULL; - } - mutex_unlock(&lec_mutex); -} - -static void *lec_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct lec_state *state = seq->private; - - ++*pos; - return lec_get_idx(state, 1); -} - -static int lec_seq_show(struct seq_file *seq, void *v) -{ - static const char lec_banner[] = - "Itf MAC ATM destination" - " Status Flags " - "VPI/VCI Recv VPI/VCI\n"; - - if (v == SEQ_START_TOKEN) - seq_puts(seq, lec_banner); - else { - struct lec_state *state = seq->private; - struct net_device *dev = state->dev; - struct lec_arp_table *entry = hlist_entry(state->node, - struct lec_arp_table, - next); - - seq_printf(seq, "%s ", dev->name); - lec_info(seq, entry); - } - return 0; -} - -static const struct seq_operations lec_seq_ops = { - .start = lec_seq_start, - .next = lec_seq_next, - .stop = lec_seq_stop, - .show = lec_seq_show, -}; -#endif - -static int lane_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) -{ - struct atm_vcc *vcc = ATM_SD(sock); - int err = 0; - - switch (cmd) { - case ATMLEC_CTRL: - case ATMLEC_MCAST: - case ATMLEC_DATA: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - break; - default: - return -ENOIOCTLCMD; - } - - mutex_lock(&lec_mutex); - switch (cmd) { - case ATMLEC_CTRL: - err = lecd_attach(vcc, (int)arg); - if (err >= 0) - sock->state = SS_CONNECTED; - break; - case ATMLEC_MCAST: - err = lec_mcast_attach(vcc, (int)arg); - break; - case ATMLEC_DATA: - err = lec_vcc_attach(vcc, (void __user *)arg); - break; - } - - mutex_unlock(&lec_mutex); - return err; -} - -static struct atm_ioctl lane_ioctl_ops = { - .owner = THIS_MODULE, - .ioctl = lane_ioctl, -}; - -static int __init lane_module_init(void) -{ -#ifdef CONFIG_PROC_FS - struct proc_dir_entry *p; - - p = proc_create_seq_private("lec", 0444, atm_proc_root, &lec_seq_ops, - sizeof(struct lec_state), NULL); - if (!p) { - pr_err("Unable to initialize /proc/net/atm/lec\n"); - return -ENOMEM; - } -#endif - - register_atm_ioctl(&lane_ioctl_ops); - pr_info("lec.c: initialized\n"); - return 0; -} - -static void __exit lane_module_cleanup(void) -{ - int i; - -#ifdef CONFIG_PROC_FS - remove_proc_entry("lec", atm_proc_root); -#endif - - deregister_atm_ioctl(&lane_ioctl_ops); - - for (i = 0; i < MAX_LEC_ITF; i++) { - if (dev_lec[i] != NULL) { - unregister_netdev(dev_lec[i]); - free_netdev(dev_lec[i]); - dev_lec[i] = NULL; - } - } -} - -module_init(lane_module_init); -module_exit(lane_module_cleanup); - -/* - * LANE2: 3.1.3, LE_RESOLVE.request - * Non force allocates memory and fills in *tlvs, fills in *sizeoftlvs. - * If sizeoftlvs == NULL the default TLVs associated with this - * lec will be used. - * If dst_mac == NULL, targetless LE_ARP will be sent - */ -static int lane2_resolve(struct net_device *dev, const u8 *dst_mac, int force, - u8 **tlvs, u32 *sizeoftlvs) -{ - unsigned long flags; - struct lec_priv *priv = netdev_priv(dev); - struct lec_arp_table *table; - struct sk_buff *skb; - int retval; - - if (force == 0) { - spin_lock_irqsave(&priv->lec_arp_lock, flags); - table = lec_arp_find(priv, dst_mac); - spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - if (table == NULL) - return -1; - - *tlvs = kmemdup(table->tlvs, table->sizeoftlvs, GFP_ATOMIC); - if (*tlvs == NULL) - return -1; - - *sizeoftlvs = table->sizeoftlvs; - - return 0; - } - - if (sizeoftlvs == NULL) - retval = send_to_lecd(priv, l_arp_xmt, dst_mac, NULL, NULL); - - else { - skb = alloc_skb(*sizeoftlvs, GFP_ATOMIC); - if (skb == NULL) - return -1; - skb->len = *sizeoftlvs; - skb_copy_to_linear_data(skb, *tlvs, *sizeoftlvs); - retval = send_to_lecd(priv, l_arp_xmt, dst_mac, NULL, skb); - } - return retval; -} - -/* - * LANE2: 3.1.4, LE_ASSOCIATE.request - * Associate the *tlvs with the *lan_dst address. - * Will overwrite any previous association - * Returns 1 for success, 0 for failure (out of memory) - * - */ -static int lane2_associate_req(struct net_device *dev, const u8 *lan_dst, - const u8 *tlvs, u32 sizeoftlvs) -{ - int retval; - struct sk_buff *skb; - struct lec_priv *priv = netdev_priv(dev); - - if (!ether_addr_equal(lan_dst, dev->dev_addr)) - return 0; /* not our mac address */ - - kfree(priv->tlvs); /* NULL if there was no previous association */ - - priv->tlvs = kmemdup(tlvs, sizeoftlvs, GFP_KERNEL); - if (priv->tlvs == NULL) - return 0; - priv->sizeoftlvs = sizeoftlvs; - - skb = alloc_skb(sizeoftlvs, GFP_ATOMIC); - if (skb == NULL) - return 0; - skb->len = sizeoftlvs; - skb_copy_to_linear_data(skb, tlvs, sizeoftlvs); - retval = send_to_lecd(priv, l_associate_req, NULL, NULL, skb); - if (retval != 0) - pr_info("lec.c: lane2_associate_req() failed\n"); - /* - * If the previous association has changed we must - * somehow notify other LANE entities about the change - */ - return 1; -} - -/* - * LANE2: 3.1.5, LE_ASSOCIATE.indication - * - */ -static void lane2_associate_ind(struct net_device *dev, const u8 *mac_addr, - const u8 *tlvs, u32 sizeoftlvs) -{ -#if 0 - int i = 0; -#endif - struct lec_priv *priv = netdev_priv(dev); -#if 0 /* - * Why have the TLVs in LE_ARP entries - * since we do not use them? When you - * uncomment this code, make sure the - * TLVs get freed when entry is killed - */ - struct lec_arp_table *entry = lec_arp_find(priv, mac_addr); - - if (entry == NULL) - return; /* should not happen */ - - kfree(entry->tlvs); - - entry->tlvs = kmemdup(tlvs, sizeoftlvs, GFP_KERNEL); - if (entry->tlvs == NULL) - return; - entry->sizeoftlvs = sizeoftlvs; -#endif -#if 0 - pr_info("\n"); - pr_info("dump of tlvs, sizeoftlvs=%d\n", sizeoftlvs); - while (i < sizeoftlvs) - pr_cont("%02x ", tlvs[i++]); - - pr_cont("\n"); -#endif - - /* tell MPOA about the TLVs we saw */ - if (priv->lane2_ops && priv->lane2_ops->associate_indicator) { - priv->lane2_ops->associate_indicator(dev, mac_addr, - tlvs, sizeoftlvs); - } -} - -/* - * Here starts what used to lec_arpc.c - * - * lec_arpc.c was added here when making - * lane client modular. October 1997 - */ - -#include <linux/types.h> -#include <linux/timer.h> -#include <linux/param.h> -#include <linux/atomic.h> -#include <linux/inetdevice.h> -#include <net/route.h> - -#if 0 -#define pr_debug(format, args...) -/* - #define pr_debug printk -*/ -#endif -#define DEBUG_ARP_TABLE 0 - -#define LEC_ARP_REFRESH_INTERVAL (3*HZ) - -static void lec_arp_check_expire(struct work_struct *work); -static void lec_arp_expire_arp(struct timer_list *t); - -/* - * Arp table funcs - */ - -#define HASH(ch) (ch & (LEC_ARP_TABLE_SIZE - 1)) - -/* - * Initialization of arp-cache - */ -static void lec_arp_init(struct lec_priv *priv) -{ - unsigned short i; - - for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) - INIT_HLIST_HEAD(&priv->lec_arp_tables[i]); - INIT_HLIST_HEAD(&priv->lec_arp_empty_ones); - INIT_HLIST_HEAD(&priv->lec_no_forward); - INIT_HLIST_HEAD(&priv->mcast_fwds); - spin_lock_init(&priv->lec_arp_lock); - INIT_DELAYED_WORK(&priv->lec_arp_work, lec_arp_check_expire); - schedule_delayed_work(&priv->lec_arp_work, LEC_ARP_REFRESH_INTERVAL); -} - -static void lec_arp_clear_vccs(struct lec_arp_table *entry) -{ - if (entry->vcc) { - struct atm_vcc *vcc = entry->vcc; - struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc); - struct net_device *dev = (struct net_device *)vcc->proto_data; - - if (vpriv) { - vcc->pop = vpriv->old_pop; - if (vpriv->xoff) - netif_wake_queue(dev); - kfree(vpriv); - vcc->user_back = NULL; - vcc->push = entry->old_push; - vcc_release_async(vcc, -EPIPE); - } - entry->vcc = NULL; - } - if (entry->recv_vcc) { - struct atm_vcc *vcc = entry->recv_vcc; - struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc); - - if (vpriv) { - kfree(vpriv); - vcc->user_back = NULL; - - entry->recv_vcc->push = entry->old_recv_push; - vcc_release_async(entry->recv_vcc, -EPIPE); - } - entry->recv_vcc = NULL; - } -} - -/* - * Insert entry to lec_arp_table - * LANE2: Add to the end of the list to satisfy 8.1.13 - */ -static inline void -lec_arp_add(struct lec_priv *priv, struct lec_arp_table *entry) -{ - struct hlist_head *tmp; - - tmp = &priv->lec_arp_tables[HASH(entry->mac_addr[ETH_ALEN - 1])]; - hlist_add_head(&entry->next, tmp); - - pr_debug("Added entry:%pM\n", entry->mac_addr); -} - -/* - * Remove entry from lec_arp_table - */ -static int -lec_arp_remove(struct lec_priv *priv, struct lec_arp_table *to_remove) -{ - struct lec_arp_table *entry; - int i, remove_vcc = 1; - - if (!to_remove) - return -1; - - hlist_del(&to_remove->next); - timer_delete(&to_remove->timer); - - /* - * If this is the only MAC connected to this VCC, - * also tear down the VCC - */ - if (to_remove->status >= ESI_FLUSH_PENDING) { - /* - * ESI_FLUSH_PENDING, ESI_FORWARD_DIRECT - */ - for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry(entry, - &priv->lec_arp_tables[i], next) { - if (memcmp(to_remove->atm_addr, - entry->atm_addr, ATM_ESA_LEN) == 0) { - remove_vcc = 0; - break; - } - } - } - if (remove_vcc) - lec_arp_clear_vccs(to_remove); - } - skb_queue_purge(&to_remove->tx_wait); /* FIXME: good place for this? */ - - pr_debug("Removed entry:%pM\n", to_remove->mac_addr); - return 0; -} - -#if DEBUG_ARP_TABLE -static const char *get_status_string(unsigned char st) -{ - switch (st) { - case ESI_UNKNOWN: - return "ESI_UNKNOWN"; - case ESI_ARP_PENDING: - return "ESI_ARP_PENDING"; - case ESI_VC_PENDING: - return "ESI_VC_PENDING"; - case ESI_FLUSH_PENDING: - return "ESI_FLUSH_PENDING"; - case ESI_FORWARD_DIRECT: - return "ESI_FORWARD_DIRECT"; - } - return "<UNKNOWN>"; -} - -static void dump_arp_table(struct lec_priv *priv) -{ - struct lec_arp_table *rulla; - char buf[256]; - int i, offset; - - pr_info("Dump %p:\n", priv); - for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry(rulla, - &priv->lec_arp_tables[i], next) { - offset = 0; - offset += sprintf(buf, "%d: %p\n", i, rulla); - offset += sprintf(buf + offset, "Mac: %pM ", - rulla->mac_addr); - offset += sprintf(buf + offset, "Atm: %*ph ", ATM_ESA_LEN, - rulla->atm_addr); - offset += sprintf(buf + offset, - "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ", - rulla->vcc ? rulla->vcc->vpi : 0, - rulla->vcc ? rulla->vcc->vci : 0, - rulla->recv_vcc ? rulla->recv_vcc-> - vpi : 0, - rulla->recv_vcc ? rulla->recv_vcc-> - vci : 0, rulla->last_used, - rulla->timestamp, rulla->no_tries); - offset += - sprintf(buf + offset, - "Flags:%x, Packets_flooded:%x, Status: %s ", - rulla->flags, rulla->packets_flooded, - get_status_string(rulla->status)); - pr_info("%s\n", buf); - } - } - - if (!hlist_empty(&priv->lec_no_forward)) - pr_info("No forward\n"); - hlist_for_each_entry(rulla, &priv->lec_no_forward, next) { - offset = 0; - offset += sprintf(buf + offset, "Mac: %pM ", rulla->mac_addr); - offset += sprintf(buf + offset, "Atm: %*ph ", ATM_ESA_LEN, - rulla->atm_addr); - offset += sprintf(buf + offset, - "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ", - rulla->vcc ? rulla->vcc->vpi : 0, - rulla->vcc ? rulla->vcc->vci : 0, - rulla->recv_vcc ? rulla->recv_vcc->vpi : 0, - rulla->recv_vcc ? rulla->recv_vcc->vci : 0, - rulla->last_used, - rulla->timestamp, rulla->no_tries); - offset += sprintf(buf + offset, - "Flags:%x, Packets_flooded:%x, Status: %s ", - rulla->flags, rulla->packets_flooded, - get_status_string(rulla->status)); - pr_info("%s\n", buf); - } - - if (!hlist_empty(&priv->lec_arp_empty_ones)) - pr_info("Empty ones\n"); - hlist_for_each_entry(rulla, &priv->lec_arp_empty_ones, next) { - offset = 0; - offset += sprintf(buf + offset, "Mac: %pM ", rulla->mac_addr); - offset += sprintf(buf + offset, "Atm: %*ph ", ATM_ESA_LEN, - rulla->atm_addr); - offset += sprintf(buf + offset, - "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ", - rulla->vcc ? rulla->vcc->vpi : 0, - rulla->vcc ? rulla->vcc->vci : 0, - rulla->recv_vcc ? rulla->recv_vcc->vpi : 0, - rulla->recv_vcc ? rulla->recv_vcc->vci : 0, - rulla->last_used, - rulla->timestamp, rulla->no_tries); - offset += sprintf(buf + offset, - "Flags:%x, Packets_flooded:%x, Status: %s ", - rulla->flags, rulla->packets_flooded, - get_status_string(rulla->status)); - pr_info("%s", buf); - } - - if (!hlist_empty(&priv->mcast_fwds)) - pr_info("Multicast Forward VCCs\n"); - hlist_for_each_entry(rulla, &priv->mcast_fwds, next) { - offset = 0; - offset += sprintf(buf + offset, "Mac: %pM ", rulla->mac_addr); - offset += sprintf(buf + offset, "Atm: %*ph ", ATM_ESA_LEN, - rulla->atm_addr); - offset += sprintf(buf + offset, - "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ", - rulla->vcc ? rulla->vcc->vpi : 0, - rulla->vcc ? rulla->vcc->vci : 0, - rulla->recv_vcc ? rulla->recv_vcc->vpi : 0, - rulla->recv_vcc ? rulla->recv_vcc->vci : 0, - rulla->last_used, - rulla->timestamp, rulla->no_tries); - offset += sprintf(buf + offset, - "Flags:%x, Packets_flooded:%x, Status: %s ", - rulla->flags, rulla->packets_flooded, - get_status_string(rulla->status)); - pr_info("%s\n", buf); - } - -} -#else -#define dump_arp_table(priv) do { } while (0) -#endif - -/* - * Destruction of arp-cache - */ -static void lec_arp_destroy(struct lec_priv *priv) -{ - unsigned long flags; - struct hlist_node *next; - struct lec_arp_table *entry; - int i; - - cancel_delayed_work_sync(&priv->lec_arp_work); - - /* - * Remove all entries - */ - - spin_lock_irqsave(&priv->lec_arp_lock, flags); - for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry_safe(entry, next, - &priv->lec_arp_tables[i], next) { - lec_arp_remove(priv, entry); - lec_arp_put(entry); - } - INIT_HLIST_HEAD(&priv->lec_arp_tables[i]); - } - - hlist_for_each_entry_safe(entry, next, - &priv->lec_arp_empty_ones, next) { - timer_delete_sync(&entry->timer); - lec_arp_clear_vccs(entry); - hlist_del(&entry->next); - lec_arp_put(entry); - } - INIT_HLIST_HEAD(&priv->lec_arp_empty_ones); - - hlist_for_each_entry_safe(entry, next, - &priv->lec_no_forward, next) { - timer_delete_sync(&entry->timer); - lec_arp_clear_vccs(entry); - hlist_del(&entry->next); - lec_arp_put(entry); - } - INIT_HLIST_HEAD(&priv->lec_no_forward); - - hlist_for_each_entry_safe(entry, next, &priv->mcast_fwds, next) { - /* No timer, LANEv2 7.1.20 and 2.3.5.3 */ - lec_arp_clear_vccs(entry); - hlist_del(&entry->next); - lec_arp_put(entry); - } - INIT_HLIST_HEAD(&priv->mcast_fwds); - priv->mcast_vcc = NULL; - spin_unlock_irqrestore(&priv->lec_arp_lock, flags); -} - -/* - * Find entry by mac_address - */ -static struct lec_arp_table *lec_arp_find(struct lec_priv *priv, - const unsigned char *mac_addr) -{ - struct hlist_head *head; - struct lec_arp_table *entry; - - pr_debug("%pM\n", mac_addr); - - head = &priv->lec_arp_tables[HASH(mac_addr[ETH_ALEN - 1])]; - hlist_for_each_entry(entry, head, next) { - if (ether_addr_equal(mac_addr, entry->mac_addr)) - return entry; - } - return NULL; -} - -static struct lec_arp_table *make_entry(struct lec_priv *priv, - const unsigned char *mac_addr) -{ - struct lec_arp_table *to_return; - - to_return = kzalloc_obj(struct lec_arp_table, GFP_ATOMIC); - if (!to_return) - return NULL; - ether_addr_copy(to_return->mac_addr, mac_addr); - INIT_HLIST_NODE(&to_return->next); - timer_setup(&to_return->timer, lec_arp_expire_arp, 0); - to_return->last_used = jiffies; - to_return->priv = priv; - skb_queue_head_init(&to_return->tx_wait); - refcount_set(&to_return->usage, 1); - return to_return; -} - -/* Arp sent timer expired */ -static void lec_arp_expire_arp(struct timer_list *t) -{ - struct lec_arp_table *entry; - - entry = timer_container_of(entry, t, timer); - - pr_debug("\n"); - if (entry->status == ESI_ARP_PENDING) { - if (entry->no_tries <= entry->priv->max_retry_count) { - if (entry->is_rdesc) - send_to_lecd(entry->priv, l_rdesc_arp_xmt, - entry->mac_addr, NULL, NULL); - else - send_to_lecd(entry->priv, l_arp_xmt, - entry->mac_addr, NULL, NULL); - entry->no_tries++; - } - mod_timer(&entry->timer, jiffies + (1 * HZ)); - } -} - -/* Unknown/unused vcc expire, remove associated entry */ -static void lec_arp_expire_vcc(struct timer_list *t) -{ - unsigned long flags; - struct lec_arp_table *to_remove = timer_container_of(to_remove, t, - timer); - struct lec_priv *priv = to_remove->priv; - - timer_delete(&to_remove->timer); - - pr_debug("%p %p: vpi:%d vci:%d\n", - to_remove, priv, - to_remove->vcc ? to_remove->recv_vcc->vpi : 0, - to_remove->vcc ? to_remove->recv_vcc->vci : 0); - - spin_lock_irqsave(&priv->lec_arp_lock, flags); - hlist_del(&to_remove->next); - spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - - lec_arp_clear_vccs(to_remove); - lec_arp_put(to_remove); -} - -static bool __lec_arp_check_expire(struct lec_arp_table *entry, - unsigned long now, - struct lec_priv *priv) -{ - unsigned long time_to_check; - - if ((entry->flags) & LEC_REMOTE_FLAG && priv->topology_change) - time_to_check = priv->forward_delay_time; - else - time_to_check = priv->aging_time; - - pr_debug("About to expire: %lx - %lx > %lx\n", - now, entry->last_used, time_to_check); - if (time_after(now, entry->last_used + time_to_check) && - !(entry->flags & LEC_PERMANENT_FLAG) && - !(entry->mac_addr[0] & 0x01)) { /* LANE2: 7.1.20 */ - /* Remove entry */ - pr_debug("Entry timed out\n"); - lec_arp_remove(priv, entry); - lec_arp_put(entry); - } else { - /* Something else */ - if ((entry->status == ESI_VC_PENDING || - entry->status == ESI_ARP_PENDING) && - time_after_eq(now, entry->timestamp + - priv->max_unknown_frame_time)) { - entry->timestamp = jiffies; - entry->packets_flooded = 0; - if (entry->status == ESI_VC_PENDING) - send_to_lecd(priv, l_svc_setup, - entry->mac_addr, - entry->atm_addr, - NULL); - } - if (entry->status == ESI_FLUSH_PENDING && - time_after_eq(now, entry->timestamp + - priv->path_switching_delay)) { - lec_arp_hold(entry); - return true; - } - } - - return false; -} -/* - * Expire entries. - * 1. Re-set timer - * 2. For each entry, delete entries that have aged past the age limit. - * 3. For each entry, depending on the status of the entry, perform - * the following maintenance. - * a. If status is ESI_VC_PENDING or ESI_ARP_PENDING then if the - * tick_count is above the max_unknown_frame_time, clear - * the tick_count to zero and clear the packets_flooded counter - * to zero. This supports the packet rate limit per address - * while flooding unknowns. - * b. If the status is ESI_FLUSH_PENDING and the tick_count is greater - * than or equal to the path_switching_delay, change the status - * to ESI_FORWARD_DIRECT. This causes the flush period to end - * regardless of the progress of the flush protocol. - */ -static void lec_arp_check_expire(struct work_struct *work) -{ - unsigned long flags; - struct lec_priv *priv = - container_of(work, struct lec_priv, lec_arp_work.work); - struct hlist_node *next; - struct lec_arp_table *entry; - unsigned long now; - int i; - - pr_debug("%p\n", priv); - now = jiffies; -restart: - spin_lock_irqsave(&priv->lec_arp_lock, flags); - for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry_safe(entry, next, - &priv->lec_arp_tables[i], next) { - if (__lec_arp_check_expire(entry, now, priv)) { - struct sk_buff *skb; - struct atm_vcc *vcc = entry->vcc; - - spin_unlock_irqrestore(&priv->lec_arp_lock, - flags); - while ((skb = skb_dequeue(&entry->tx_wait))) - lec_send(vcc, skb); - entry->last_used = jiffies; - entry->status = ESI_FORWARD_DIRECT; - lec_arp_put(entry); - - goto restart; - } - } - } - spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - - schedule_delayed_work(&priv->lec_arp_work, LEC_ARP_REFRESH_INTERVAL); -} - -/* - * Try to find vcc where mac_address is attached. - * - */ -static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv, - const unsigned char *mac_to_find, - int is_rdesc, - struct lec_arp_table **ret_entry) -{ - unsigned long flags; - struct lec_arp_table *entry; - struct atm_vcc *found; - - if (mac_to_find[0] & 0x01) { - switch (priv->lane_version) { - case 1: - return priv->mcast_vcc; - case 2: /* LANE2 wants arp for multicast addresses */ - if (ether_addr_equal(mac_to_find, bus_mac)) - return priv->mcast_vcc; - break; - default: - break; - } - } - - spin_lock_irqsave(&priv->lec_arp_lock, flags); - entry = lec_arp_find(priv, mac_to_find); - - if (entry) { - if (entry->status == ESI_FORWARD_DIRECT) { - /* Connection Ok */ - entry->last_used = jiffies; - lec_arp_hold(entry); - *ret_entry = entry; - found = entry->vcc; - goto out; - } - /* - * If the LE_ARP cache entry is still pending, reset count to 0 - * so another LE_ARP request can be made for this frame. - */ - if (entry->status == ESI_ARP_PENDING) - entry->no_tries = 0; - /* - * Data direct VC not yet set up, check to see if the unknown - * frame count is greater than the limit. If the limit has - * not been reached, allow the caller to send packet to - * BUS. - */ - if (entry->status != ESI_FLUSH_PENDING && - entry->packets_flooded < - priv->maximum_unknown_frame_count) { - entry->packets_flooded++; - pr_debug("Flooding..\n"); - found = priv->mcast_vcc; - goto out; - } - /* - * We got here because entry->status == ESI_FLUSH_PENDING - * or BUS flood limit was reached for an entry which is - * in ESI_ARP_PENDING or ESI_VC_PENDING state. - */ - lec_arp_hold(entry); - *ret_entry = entry; - pr_debug("entry->status %d entry->vcc %p\n", entry->status, - entry->vcc); - found = NULL; - } else { - /* No matching entry was found */ - entry = make_entry(priv, mac_to_find); - pr_debug("Making entry\n"); - if (!entry) { - found = priv->mcast_vcc; - goto out; - } - lec_arp_add(priv, entry); - /* We want arp-request(s) to be sent */ - entry->packets_flooded = 1; - entry->status = ESI_ARP_PENDING; - entry->no_tries = 1; - entry->last_used = entry->timestamp = jiffies; - entry->is_rdesc = is_rdesc; - if (entry->is_rdesc) - send_to_lecd(priv, l_rdesc_arp_xmt, mac_to_find, NULL, - NULL); - else - send_to_lecd(priv, l_arp_xmt, mac_to_find, NULL, NULL); - entry->timer.expires = jiffies + (1 * HZ); - entry->timer.function = lec_arp_expire_arp; - add_timer(&entry->timer); - found = priv->mcast_vcc; - } - -out: - spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - return found; -} - -static int -lec_addr_delete(struct lec_priv *priv, const unsigned char *atm_addr, - unsigned long permanent) -{ - unsigned long flags; - struct hlist_node *next; - struct lec_arp_table *entry; - int i; - - pr_debug("\n"); - spin_lock_irqsave(&priv->lec_arp_lock, flags); - for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry_safe(entry, next, - &priv->lec_arp_tables[i], next) { - if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN) && - (permanent || - !(entry->flags & LEC_PERMANENT_FLAG))) { - lec_arp_remove(priv, entry); - lec_arp_put(entry); - } - spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - return 0; - } - } - spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - return -1; -} - -/* - * Notifies: Response to arp_request (atm_addr != NULL) - */ -static void -lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr, - const unsigned char *atm_addr, unsigned long remoteflag, - unsigned int targetless_le_arp) -{ - unsigned long flags; - struct hlist_node *next; - struct lec_arp_table *entry, *tmp; - int i; - - pr_debug("%smac:%pM\n", - (targetless_le_arp) ? "targetless " : "", mac_addr); - - spin_lock_irqsave(&priv->lec_arp_lock, flags); - entry = lec_arp_find(priv, mac_addr); - if (entry == NULL && targetless_le_arp) - goto out; /* - * LANE2: ignore targetless LE_ARPs for which - * we have no entry in the cache. 7.1.30 - */ - if (!hlist_empty(&priv->lec_arp_empty_ones)) { - hlist_for_each_entry_safe(entry, next, - &priv->lec_arp_empty_ones, next) { - if (memcmp(entry->atm_addr, atm_addr, ATM_ESA_LEN) == 0) { - hlist_del(&entry->next); - timer_delete(&entry->timer); - tmp = lec_arp_find(priv, mac_addr); - if (tmp) { - timer_delete(&tmp->timer); - tmp->status = ESI_FORWARD_DIRECT; - memcpy(tmp->atm_addr, atm_addr, ATM_ESA_LEN); - tmp->vcc = entry->vcc; - tmp->old_push = entry->old_push; - tmp->last_used = jiffies; - timer_delete(&entry->timer); - lec_arp_put(entry); - entry = tmp; - } else { - entry->status = ESI_FORWARD_DIRECT; - ether_addr_copy(entry->mac_addr, - mac_addr); - entry->last_used = jiffies; - lec_arp_add(priv, entry); - } - if (remoteflag) - entry->flags |= LEC_REMOTE_FLAG; - else - entry->flags &= ~LEC_REMOTE_FLAG; - pr_debug("After update\n"); - dump_arp_table(priv); - goto out; - } - } - } - - entry = lec_arp_find(priv, mac_addr); - if (!entry) { - entry = make_entry(priv, mac_addr); - if (!entry) - goto out; - entry->status = ESI_UNKNOWN; - lec_arp_add(priv, entry); - /* Temporary, changes before end of function */ - } - memcpy(entry->atm_addr, atm_addr, ATM_ESA_LEN); - timer_delete(&entry->timer); - for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry(tmp, - &priv->lec_arp_tables[i], next) { - if (entry != tmp && - !memcmp(tmp->atm_addr, atm_addr, ATM_ESA_LEN)) { - /* Vcc to this host exists */ - if (tmp->status > ESI_VC_PENDING) { - /* - * ESI_FLUSH_PENDING, - * ESI_FORWARD_DIRECT - */ - entry->vcc = tmp->vcc; - entry->old_push = tmp->old_push; - } - entry->status = tmp->status; - break; - } - } - } - if (remoteflag) - entry->flags |= LEC_REMOTE_FLAG; - else - entry->flags &= ~LEC_REMOTE_FLAG; - if (entry->status == ESI_ARP_PENDING || entry->status == ESI_UNKNOWN) { - entry->status = ESI_VC_PENDING; - send_to_lecd(priv, l_svc_setup, entry->mac_addr, atm_addr, NULL); - } - pr_debug("After update2\n"); - dump_arp_table(priv); -out: - spin_unlock_irqrestore(&priv->lec_arp_lock, flags); -} - -/* - * Notifies: Vcc setup ready - */ -static void -lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data, - struct atm_vcc *vcc, - void (*old_push) (struct atm_vcc *vcc, struct sk_buff *skb)) -{ - unsigned long flags; - struct lec_arp_table *entry; - int i, found_entry = 0; - - spin_lock_irqsave(&priv->lec_arp_lock, flags); - /* Vcc for Multicast Forward. No timer, LANEv2 7.1.20 and 2.3.5.3 */ - if (ioc_data->receive == 2) { - pr_debug("LEC_ARP: Attaching mcast forward\n"); -#if 0 - entry = lec_arp_find(priv, bus_mac); - if (!entry) { - pr_info("LEC_ARP: Multicast entry not found!\n"); - goto out; - } - memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN); - entry->recv_vcc = vcc; - entry->old_recv_push = old_push; -#endif - entry = make_entry(priv, bus_mac); - if (entry == NULL) - goto out; - timer_delete(&entry->timer); - memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN); - entry->recv_vcc = vcc; - entry->old_recv_push = old_push; - hlist_add_head(&entry->next, &priv->mcast_fwds); - goto out; - } else if (ioc_data->receive == 1) { - /* - * Vcc which we don't want to make default vcc, - * attach it anyway. - */ - pr_debug("LEC_ARP:Attaching data direct, not default: %*phN\n", - ATM_ESA_LEN, ioc_data->atm_addr); - entry = make_entry(priv, bus_mac); - if (entry == NULL) - goto out; - memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN); - eth_zero_addr(entry->mac_addr); - entry->recv_vcc = vcc; - entry->old_recv_push = old_push; - entry->status = ESI_UNKNOWN; - entry->timer.expires = jiffies + priv->vcc_timeout_period; - entry->timer.function = lec_arp_expire_vcc; - hlist_add_head(&entry->next, &priv->lec_no_forward); - add_timer(&entry->timer); - dump_arp_table(priv); - goto out; - } - pr_debug("LEC_ARP:Attaching data direct, default: %*phN\n", - ATM_ESA_LEN, ioc_data->atm_addr); - for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry(entry, - &priv->lec_arp_tables[i], next) { - if (memcmp - (ioc_data->atm_addr, entry->atm_addr, - ATM_ESA_LEN) == 0) { - pr_debug("LEC_ARP: Attaching data direct\n"); - pr_debug("Currently -> Vcc: %d, Rvcc:%d\n", - entry->vcc ? entry->vcc->vci : 0, - entry->recv_vcc ? entry->recv_vcc-> - vci : 0); - found_entry = 1; - timer_delete(&entry->timer); - entry->vcc = vcc; - entry->old_push = old_push; - if (entry->status == ESI_VC_PENDING) { - if (priv->maximum_unknown_frame_count - == 0) - entry->status = - ESI_FORWARD_DIRECT; - else { - entry->timestamp = jiffies; - entry->status = - ESI_FLUSH_PENDING; -#if 0 - send_to_lecd(priv, l_flush_xmt, - NULL, - entry->atm_addr, - NULL); -#endif - } - } else { - /* - * They were forming a connection - * to us, and we to them. Our - * ATM address is numerically lower - * than theirs, so we make connection - * we formed into default VCC (8.1.11). - * Connection they made gets torn - * down. This might confuse some - * clients. Can be changed if - * someone reports trouble... - */ - ; - } - } - } - } - if (found_entry) { - pr_debug("After vcc was added\n"); - dump_arp_table(priv); - goto out; - } - /* - * Not found, snatch address from first data packet that arrives - * from this vcc - */ - entry = make_entry(priv, bus_mac); - if (!entry) - goto out; - entry->vcc = vcc; - entry->old_push = old_push; - memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN); - eth_zero_addr(entry->mac_addr); - entry->status = ESI_UNKNOWN; - hlist_add_head(&entry->next, &priv->lec_arp_empty_ones); - entry->timer.expires = jiffies + priv->vcc_timeout_period; - entry->timer.function = lec_arp_expire_vcc; - add_timer(&entry->timer); - pr_debug("After vcc was added\n"); - dump_arp_table(priv); -out: - spin_unlock_irqrestore(&priv->lec_arp_lock, flags); -} - -static void lec_flush_complete(struct lec_priv *priv, unsigned long tran_id) -{ - unsigned long flags; - struct lec_arp_table *entry; - int i; - - pr_debug("%lx\n", tran_id); -restart: - spin_lock_irqsave(&priv->lec_arp_lock, flags); - for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry(entry, - &priv->lec_arp_tables[i], next) { - if (entry->flush_tran_id == tran_id && - entry->status == ESI_FLUSH_PENDING) { - struct sk_buff *skb; - struct atm_vcc *vcc = entry->vcc; - - lec_arp_hold(entry); - spin_unlock_irqrestore(&priv->lec_arp_lock, - flags); - while ((skb = skb_dequeue(&entry->tx_wait))) - lec_send(vcc, skb); - entry->last_used = jiffies; - entry->status = ESI_FORWARD_DIRECT; - lec_arp_put(entry); - pr_debug("LEC_ARP: Flushed\n"); - goto restart; - } - } - } - spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - dump_arp_table(priv); -} - -static void -lec_set_flush_tran_id(struct lec_priv *priv, - const unsigned char *atm_addr, unsigned long tran_id) -{ - unsigned long flags; - struct lec_arp_table *entry; - int i; - - spin_lock_irqsave(&priv->lec_arp_lock, flags); - for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) - hlist_for_each_entry(entry, - &priv->lec_arp_tables[i], next) { - if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN)) { - entry->flush_tran_id = tran_id; - pr_debug("Set flush transaction id to %lx for %p\n", - tran_id, entry); - } - } - spin_unlock_irqrestore(&priv->lec_arp_lock, flags); -} - -static int lec_mcast_make(struct lec_priv *priv, struct atm_vcc *vcc) -{ - unsigned long flags; - unsigned char mac_addr[] = { - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff - }; - struct lec_arp_table *to_add; - struct lec_vcc_priv *vpriv; - int err = 0; - - vpriv = kmalloc_obj(struct lec_vcc_priv); - if (!vpriv) - return -ENOMEM; - vpriv->xoff = 0; - vpriv->old_pop = vcc->pop; - vcc->user_back = vpriv; - vcc->pop = lec_pop; - spin_lock_irqsave(&priv->lec_arp_lock, flags); - to_add = make_entry(priv, mac_addr); - if (!to_add) { - vcc->pop = vpriv->old_pop; - kfree(vpriv); - err = -ENOMEM; - goto out; - } - memcpy(to_add->atm_addr, vcc->remote.sas_addr.prv, ATM_ESA_LEN); - to_add->status = ESI_FORWARD_DIRECT; - to_add->flags |= LEC_PERMANENT_FLAG; - to_add->vcc = vcc; - to_add->old_push = vcc->push; - vcc->push = lec_push; - priv->mcast_vcc = vcc; - lec_arp_add(priv, to_add); -out: - spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - return err; -} - -static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc) -{ - unsigned long flags; - struct hlist_node *next; - struct lec_arp_table *entry; - int i; - - pr_debug("LEC_ARP: lec_vcc_close vpi:%d vci:%d\n", vcc->vpi, vcc->vci); - dump_arp_table(priv); - - spin_lock_irqsave(&priv->lec_arp_lock, flags); - - for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry_safe(entry, next, - &priv->lec_arp_tables[i], next) { - if (vcc == entry->vcc) { - lec_arp_remove(priv, entry); - lec_arp_put(entry); - if (priv->mcast_vcc == vcc) - priv->mcast_vcc = NULL; - } - } - } - - hlist_for_each_entry_safe(entry, next, - &priv->lec_arp_empty_ones, next) { - if (entry->vcc == vcc) { - lec_arp_clear_vccs(entry); - timer_delete(&entry->timer); - hlist_del(&entry->next); - lec_arp_put(entry); - } - } - - hlist_for_each_entry_safe(entry, next, - &priv->lec_no_forward, next) { - if (entry->recv_vcc == vcc) { - lec_arp_clear_vccs(entry); - timer_delete(&entry->timer); - hlist_del(&entry->next); - lec_arp_put(entry); - } - } - - hlist_for_each_entry_safe(entry, next, &priv->mcast_fwds, next) { - if (entry->recv_vcc == vcc) { - lec_arp_clear_vccs(entry); - /* No timer, LANEv2 7.1.20 and 2.3.5.3 */ - hlist_del(&entry->next); - lec_arp_put(entry); - } - } - - spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - dump_arp_table(priv); -} - -static void -lec_arp_check_empties(struct lec_priv *priv, - struct atm_vcc *vcc, struct sk_buff *skb) -{ - unsigned long flags; - struct hlist_node *next; - struct lec_arp_table *entry, *tmp; - struct lecdatahdr_8023 *hdr = (struct lecdatahdr_8023 *)skb->data; - unsigned char *src = hdr->h_source; - - spin_lock_irqsave(&priv->lec_arp_lock, flags); - hlist_for_each_entry_safe(entry, next, - &priv->lec_arp_empty_ones, next) { - if (vcc == entry->vcc) { - timer_delete(&entry->timer); - ether_addr_copy(entry->mac_addr, src); - entry->status = ESI_FORWARD_DIRECT; - entry->last_used = jiffies; - /* We might have got an entry */ - tmp = lec_arp_find(priv, src); - if (tmp) { - lec_arp_remove(priv, tmp); - lec_arp_put(tmp); - } - hlist_del(&entry->next); - lec_arp_add(priv, entry); - goto out; - } - } - pr_debug("LEC_ARP: Arp_check_empties: entry not found!\n"); -out: - spin_unlock_irqrestore(&priv->lec_arp_lock, flags); -} - -MODULE_DESCRIPTION("ATM LAN Emulation (LANE) support"); -MODULE_LICENSE("GPL"); diff --git a/net/atm/lec.h b/net/atm/lec.h deleted file mode 100644 index ec85709bf818..000000000000 --- a/net/atm/lec.h +++ /dev/null @@ -1,155 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Lan Emulation client header file - * - * Marko Kiiskila <mkiiskila@yahoo.com> - */ - -#ifndef _LEC_H_ -#define _LEC_H_ - -#include <linux/atmdev.h> -#include <linux/netdevice.h> -#include <linux/atmlec.h> - -#define LEC_HEADER_LEN 16 - -struct lecdatahdr_8023 { - __be16 le_header; - unsigned char h_dest[ETH_ALEN]; - unsigned char h_source[ETH_ALEN]; - __be16 h_type; -}; - -struct lecdatahdr_8025 { - __be16 le_header; - unsigned char ac_pad; - unsigned char fc; - unsigned char h_dest[ETH_ALEN]; - unsigned char h_source[ETH_ALEN]; -}; - -#define LEC_MINIMUM_8023_SIZE 62 -#define LEC_MINIMUM_8025_SIZE 16 - -/* - * Operations that LANE2 capable device can do. Two first functions - * are used to make the device do things. See spec 3.1.3 and 3.1.4. - * - * The third function is intended for the MPOA component sitting on - * top of the LANE device. The MPOA component assigns it's own function - * to (*associate_indicator)() and the LANE device will use that - * function to tell about TLVs it sees floating through. - * - */ -struct lane2_ops { - int (*resolve) (struct net_device *dev, const u8 *dst_mac, int force, - u8 **tlvs, u32 *sizeoftlvs); - int (*associate_req) (struct net_device *dev, const u8 *lan_dst, - const u8 *tlvs, u32 sizeoftlvs); - void (*associate_indicator) (struct net_device *dev, const u8 *mac_addr, - const u8 *tlvs, u32 sizeoftlvs); -}; - -/* - * ATM LAN Emulation supports both LLC & Dix Ethernet EtherType - * frames. - * - * 1. Dix Ethernet EtherType frames encoded by placing EtherType - * field in h_type field. Data follows immediately after header. - * 2. LLC Data frames whose total length, including LLC field and data, - * but not padding required to meet the minimum data frame length, - * is less than ETH_P_802_3_MIN MUST be encoded by placing that length - * in the h_type field. The LLC field follows header immediately. - * 3. LLC data frames longer than this maximum MUST be encoded by placing - * the value 0 in the h_type field. - * - */ - -/* Hash table size */ -#define LEC_ARP_TABLE_SIZE 16 - -struct lec_priv { - unsigned short lecid; /* Lecid of this client */ - struct hlist_head lec_arp_empty_ones; - /* Used for storing VCC's that don't have a MAC address attached yet */ - struct hlist_head lec_arp_tables[LEC_ARP_TABLE_SIZE]; - /* Actual LE ARP table */ - struct hlist_head lec_no_forward; - /* - * Used for storing VCC's (and forward packets from) which are to - * age out by not using them to forward packets. - * This is because to some LE clients there will be 2 VCCs. Only - * one of them gets used. - */ - struct hlist_head mcast_fwds; - /* - * With LANEv2 it is possible that BUS (or a special multicast server) - * establishes multiple Multicast Forward VCCs to us. This list - * collects all those VCCs. LANEv1 client has only one item in this - * list. These entries are not aged out. - */ - spinlock_t lec_arp_lock; - struct atm_vcc *mcast_vcc; /* Default Multicast Send VCC */ - struct atm_vcc __rcu *lecd; - struct delayed_work lec_arp_work; /* C10 */ - unsigned int maximum_unknown_frame_count; - /* - * Within the period of time defined by this variable, the client will send - * no more than C10 frames to BUS for a given unicast destination. (C11) - */ - unsigned long max_unknown_frame_time; - /* - * If no traffic has been sent in this vcc for this period of time, - * vcc will be torn down (C12) - */ - unsigned long vcc_timeout_period; - /* - * An LE Client MUST not retry an LE_ARP_REQUEST for a - * given frame's LAN Destination more than maximum retry count times, - * after the first LEC_ARP_REQUEST (C13) - */ - unsigned short max_retry_count; - /* - * Max time the client will maintain an entry in its arp cache in - * absence of a verification of that relationship (C17) - */ - unsigned long aging_time; - /* - * Max time the client will maintain an entry in cache when - * topology change flag is true (C18) - */ - unsigned long forward_delay_time; /* Topology change flag (C19) */ - int topology_change; - /* - * Max time the client expects an LE_ARP_REQUEST/LE_ARP_RESPONSE - * cycle to take (C20) - */ - unsigned long arp_response_time; - /* - * Time limit ot wait to receive an LE_FLUSH_RESPONSE after the - * LE_FLUSH_REQUEST has been sent before taking recover action. (C21) - */ - unsigned long flush_timeout; - /* The time since sending a frame to the bus after which the - * LE Client may assume that the frame has been either discarded or - * delivered to the recipient (C22) - */ - unsigned long path_switching_delay; - - u8 *tlvs; /* LANE2: TLVs are new */ - u32 sizeoftlvs; /* The size of the tlv array in bytes */ - int lane_version; /* LANE2 */ - int itfnum; /* e.g. 2 for lec2, 5 for lec5 */ - struct lane2_ops *lane2_ops; /* can be NULL for LANE v1 */ - int is_proxy; /* bridge between ATM and Ethernet */ -}; - -struct lec_vcc_priv { - void (*old_pop) (struct atm_vcc *vcc, struct sk_buff *skb); - int xoff; -}; - -#define LEC_VCC_PRIV(vcc) ((struct lec_vcc_priv *)((vcc)->user_back)) - -#endif /* _LEC_H_ */ diff --git a/net/atm/lec_arpc.h b/net/atm/lec_arpc.h deleted file mode 100644 index 39115fe074c4..000000000000 --- a/net/atm/lec_arpc.h +++ /dev/null @@ -1,97 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Lec arp cache - * - * Marko Kiiskila <mkiiskila@yahoo.com> - */ -#ifndef _LEC_ARP_H_ -#define _LEC_ARP_H_ -#include <linux/atm.h> -#include <linux/atmdev.h> -#include <linux/if_ether.h> -#include <linux/atmlec.h> - -struct lec_arp_table { - struct hlist_node next; /* Linked entry list */ - unsigned char atm_addr[ATM_ESA_LEN]; /* Atm address */ - unsigned char mac_addr[ETH_ALEN]; /* Mac address */ - int is_rdesc; /* Mac address is a route descriptor */ - struct atm_vcc *vcc; /* Vcc this entry is attached */ - struct atm_vcc *recv_vcc; /* Vcc we receive data from */ - - void (*old_push) (struct atm_vcc *vcc, struct sk_buff *skb); - /* Push that leads to daemon */ - - void (*old_recv_push) (struct atm_vcc *vcc, struct sk_buff *skb); - /* Push that leads to daemon */ - - unsigned long last_used; /* For expiry */ - unsigned long timestamp; /* Used for various timestamping things: - * 1. FLUSH started - * (status=ESI_FLUSH_PENDING) - * 2. Counting to - * max_unknown_frame_time - * (status=ESI_ARP_PENDING|| - * status=ESI_VC_PENDING) - */ - unsigned char no_tries; /* No of times arp retry has been tried */ - unsigned char status; /* Status of this entry */ - unsigned short flags; /* Flags for this entry */ - unsigned short packets_flooded; /* Data packets flooded */ - unsigned long flush_tran_id; /* Transaction id in flush protocol */ - struct timer_list timer; /* Arping timer */ - struct lec_priv *priv; /* Pointer back */ - u8 *tlvs; - u32 sizeoftlvs; /* - * LANE2: Each MAC address can have TLVs - * associated with it. sizeoftlvs tells - * the length of the tlvs array - */ - struct sk_buff_head tx_wait; /* wait queue for outgoing packets */ - refcount_t usage; /* usage count */ -}; - -/* - * LANE2: Template tlv struct for accessing - * the tlvs in the lec_arp_table->tlvs array - */ -struct tlv { - u32 type; - u8 length; - u8 value[255]; -}; - -/* Status fields */ -#define ESI_UNKNOWN 0 /* - * Next packet sent to this mac address - * causes ARP-request to be sent - */ -#define ESI_ARP_PENDING 1 /* - * There is no ATM address associated with this - * 48-bit address. The LE-ARP protocol is in - * progress. - */ -#define ESI_VC_PENDING 2 /* - * There is a valid ATM address associated with - * this 48-bit address but there is no VC set - * up to that ATM address. The signaling - * protocol is in process. - */ -#define ESI_FLUSH_PENDING 4 /* - * The LEC has been notified of the FLUSH_START - * status and it is assumed that the flush - * protocol is in process. - */ -#define ESI_FORWARD_DIRECT 5 /* - * Either the Path Switching Delay (C22) has - * elapsed or the LEC has notified the Mapping - * that the flush protocol has completed. In - * either case, it is safe to forward packets - * to this address via the data direct VC. - */ - -/* Flag values */ -#define LEC_REMOTE_FLAG 0x0001 -#define LEC_PERMANENT_FLAG 0x0002 - -#endif /* _LEC_ARP_H_ */ diff --git a/net/atm/mpc.c b/net/atm/mpc.c deleted file mode 100644 index ce8e9780373b..000000000000 --- a/net/atm/mpc.c +++ /dev/null @@ -1,1538 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ - -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/slab.h> -#include <linux/timer.h> -#include <linux/init.h> -#include <linux/bitops.h> -#include <linux/capability.h> -#include <linux/seq_file.h> - -/* We are an ethernet device */ -#include <linux/if_ether.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <net/sock.h> -#include <linux/skbuff.h> -#include <linux/ip.h> -#include <linux/uaccess.h> -#include <asm/byteorder.h> -#include <net/checksum.h> /* for ip_fast_csum() */ -#include <net/arp.h> -#include <net/dst.h> -#include <linux/proc_fs.h> - -/* And atm device */ -#include <linux/atmdev.h> -#include <linux/atmlec.h> -#include <linux/atmmpc.h> -/* Modular too */ -#include <linux/module.h> - -#include "lec.h" -#include "mpc.h" -#include "resources.h" - -/* - * mpc.c: Implementation of MPOA client kernel part - */ - -#if 0 -#define dprintk(format, args...) \ - printk(KERN_DEBUG "mpoa:%s: " format, __func__, ##args) -#define dprintk_cont(format, args...) printk(KERN_CONT format, ##args) -#else -#define dprintk(format, args...) \ - do { if (0) \ - printk(KERN_DEBUG "mpoa:%s: " format, __func__, ##args);\ - } while (0) -#define dprintk_cont(format, args...) \ - do { if (0) printk(KERN_CONT format, ##args); } while (0) -#endif - -#if 0 -#define ddprintk(format, args...) \ - printk(KERN_DEBUG "mpoa:%s: " format, __func__, ##args) -#define ddprintk_cont(format, args...) printk(KERN_CONT format, ##args) -#else -#define ddprintk(format, args...) \ - do { if (0) \ - printk(KERN_DEBUG "mpoa:%s: " format, __func__, ##args);\ - } while (0) -#define ddprintk_cont(format, args...) \ - do { if (0) printk(KERN_CONT format, ##args); } while (0) -#endif - -/* mpc_daemon -> kernel */ -static void MPOA_trigger_rcvd(struct k_message *msg, struct mpoa_client *mpc); -static void MPOA_res_reply_rcvd(struct k_message *msg, struct mpoa_client *mpc); -static void ingress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc); -static void egress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc); -static void mps_death(struct k_message *msg, struct mpoa_client *mpc); -static void clean_up(struct k_message *msg, struct mpoa_client *mpc, - int action); -static void MPOA_cache_impos_rcvd(struct k_message *msg, - struct mpoa_client *mpc); -static void set_mpc_ctrl_addr_rcvd(struct k_message *mesg, - struct mpoa_client *mpc); -static void set_mps_mac_addr_rcvd(struct k_message *mesg, - struct mpoa_client *mpc); - -static const uint8_t *copy_macs(struct mpoa_client *mpc, - const uint8_t *router_mac, - const uint8_t *tlvs, uint8_t mps_macs, - uint8_t device_type); -static void purge_egress_shortcut(struct atm_vcc *vcc, eg_cache_entry *entry); - -static void send_set_mps_ctrl_addr(const char *addr, struct mpoa_client *mpc); -static void mpoad_close(struct atm_vcc *vcc); -static int msg_from_mpoad(struct atm_vcc *vcc, struct sk_buff *skb); - -static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb); -static netdev_tx_t mpc_send_packet(struct sk_buff *skb, - struct net_device *dev); -static int mpoa_event_listener(struct notifier_block *mpoa_notifier, - unsigned long event, void *dev); -static void mpc_timer_refresh(void); -static void mpc_cache_check(struct timer_list *unused); - -static struct llc_snap_hdr llc_snap_mpoa_ctrl = { - 0xaa, 0xaa, 0x03, - {0x00, 0x00, 0x5e}, - {0x00, 0x03} /* For MPOA control PDUs */ -}; -static struct llc_snap_hdr llc_snap_mpoa_data = { - 0xaa, 0xaa, 0x03, - {0x00, 0x00, 0x00}, - {0x08, 0x00} /* This is for IP PDUs only */ -}; -static struct llc_snap_hdr llc_snap_mpoa_data_tagged = { - 0xaa, 0xaa, 0x03, - {0x00, 0x00, 0x00}, - {0x88, 0x4c} /* This is for tagged data PDUs */ -}; - -static struct notifier_block mpoa_notifier = { - mpoa_event_listener, - NULL, - 0 -}; - -struct mpoa_client *mpcs = NULL; /* FIXME */ -static struct atm_mpoa_qos *qos_head = NULL; -static DEFINE_TIMER(mpc_timer, mpc_cache_check); - - -static struct mpoa_client *find_mpc_by_itfnum(int itf) -{ - struct mpoa_client *mpc; - - mpc = mpcs; /* our global linked list */ - while (mpc != NULL) { - if (mpc->dev_num == itf) - return mpc; - mpc = mpc->next; - } - - return NULL; /* not found */ -} - -static struct mpoa_client *find_mpc_by_vcc(struct atm_vcc *vcc) -{ - struct mpoa_client *mpc; - - mpc = mpcs; /* our global linked list */ - while (mpc != NULL) { - if (mpc->mpoad_vcc == vcc) - return mpc; - mpc = mpc->next; - } - - return NULL; /* not found */ -} - -static struct mpoa_client *find_mpc_by_lec(struct net_device *dev) -{ - struct mpoa_client *mpc; - - mpc = mpcs; /* our global linked list */ - while (mpc != NULL) { - if (mpc->dev == dev) - return mpc; - mpc = mpc->next; - } - - return NULL; /* not found */ -} - -/* - * Functions for managing QoS list - */ - -/* - * Overwrites the old entry or makes a new one. - */ -struct atm_mpoa_qos *atm_mpoa_add_qos(__be32 dst_ip, struct atm_qos *qos) -{ - struct atm_mpoa_qos *entry; - - entry = atm_mpoa_search_qos(dst_ip); - if (entry != NULL) { - entry->qos = *qos; - return entry; - } - - entry = kmalloc_obj(struct atm_mpoa_qos); - if (entry == NULL) { - pr_info("mpoa: out of memory\n"); - return entry; - } - - entry->ipaddr = dst_ip; - entry->qos = *qos; - - entry->next = qos_head; - qos_head = entry; - - return entry; -} - -struct atm_mpoa_qos *atm_mpoa_search_qos(__be32 dst_ip) -{ - struct atm_mpoa_qos *qos; - - qos = qos_head; - while (qos) { - if (qos->ipaddr == dst_ip) - break; - qos = qos->next; - } - - return qos; -} - -/* - * Returns 0 for failure - */ -int atm_mpoa_delete_qos(struct atm_mpoa_qos *entry) -{ - struct atm_mpoa_qos *curr; - - if (entry == NULL) - return 0; - if (entry == qos_head) { - qos_head = qos_head->next; - kfree(entry); - return 1; - } - - curr = qos_head; - while (curr != NULL) { - if (curr->next == entry) { - curr->next = entry->next; - kfree(entry); - return 1; - } - curr = curr->next; - } - - return 0; -} - -/* this is buggered - we need locking for qos_head */ -void atm_mpoa_disp_qos(struct seq_file *m) -{ - struct atm_mpoa_qos *qos; - - qos = qos_head; - seq_printf(m, "QoS entries for shortcuts:\n"); - seq_printf(m, "IP address\n TX:max_pcr pcr min_pcr max_cdv max_sdu\n RX:max_pcr pcr min_pcr max_cdv max_sdu\n"); - - while (qos != NULL) { - seq_printf(m, "%pI4\n %-7d %-7d %-7d %-7d %-7d\n %-7d %-7d %-7d %-7d %-7d\n", - &qos->ipaddr, - qos->qos.txtp.max_pcr, - qos->qos.txtp.pcr, - qos->qos.txtp.min_pcr, - qos->qos.txtp.max_cdv, - qos->qos.txtp.max_sdu, - qos->qos.rxtp.max_pcr, - qos->qos.rxtp.pcr, - qos->qos.rxtp.min_pcr, - qos->qos.rxtp.max_cdv, - qos->qos.rxtp.max_sdu); - qos = qos->next; - } -} - -static struct net_device *find_lec_by_itfnum(int itf) -{ - struct net_device *dev; - char name[IFNAMSIZ]; - - sprintf(name, "lec%d", itf); - dev = dev_get_by_name(&init_net, name); - - return dev; -} - -static struct mpoa_client *alloc_mpc(void) -{ - struct mpoa_client *mpc; - - mpc = kzalloc_obj(struct mpoa_client); - if (mpc == NULL) - return NULL; - rwlock_init(&mpc->ingress_lock); - rwlock_init(&mpc->egress_lock); - mpc->next = mpcs; - atm_mpoa_init_cache(mpc); - - mpc->parameters.mpc_p1 = MPC_P1; - mpc->parameters.mpc_p2 = MPC_P2; - memset(mpc->parameters.mpc_p3, 0, sizeof(mpc->parameters.mpc_p3)); - mpc->parameters.mpc_p4 = MPC_P4; - mpc->parameters.mpc_p5 = MPC_P5; - mpc->parameters.mpc_p6 = MPC_P6; - - mpcs = mpc; - - return mpc; -} - -/* - * - * start_mpc() puts the MPC on line. All the packets destined - * to the lec underneath us are now being monitored and - * shortcuts will be established. - * - */ -static void start_mpc(struct mpoa_client *mpc, struct net_device *dev) -{ - - dprintk("(%s)\n", mpc->dev->name); - if (!dev->netdev_ops) - pr_info("(%s) not starting\n", dev->name); - else { - mpc->old_ops = dev->netdev_ops; - mpc->new_ops = *mpc->old_ops; - mpc->new_ops.ndo_start_xmit = mpc_send_packet; - dev->netdev_ops = &mpc->new_ops; - } -} - -static void stop_mpc(struct mpoa_client *mpc) -{ - struct net_device *dev = mpc->dev; - dprintk("(%s)", mpc->dev->name); - - /* Lets not nullify lec device's dev->hard_start_xmit */ - if (dev->netdev_ops != &mpc->new_ops) { - dprintk_cont(" mpc already stopped, not fatal\n"); - return; - } - dprintk_cont("\n"); - - dev->netdev_ops = mpc->old_ops; - mpc->old_ops = NULL; - - /* close_shortcuts(mpc); ??? FIXME */ -} - -static const char *mpoa_device_type_string(char type) __attribute__ ((unused)); - -static const char *mpoa_device_type_string(char type) -{ - switch (type) { - case NON_MPOA: - return "non-MPOA device"; - case MPS: - return "MPS"; - case MPC: - return "MPC"; - case MPS_AND_MPC: - return "both MPS and MPC"; - } - - return "unspecified (non-MPOA) device"; -} - -/* - * lec device calls this via its netdev_priv(dev)->lane2_ops - * ->associate_indicator() when it sees a TLV in LE_ARP packet. - * We fill in the pointer above when we see a LANE2 lec initializing - * See LANE2 spec 3.1.5 - * - * Quite a big and ugly function but when you look at it - * all it does is to try to locate and parse MPOA Device - * Type TLV. - * We give our lec a pointer to this function and when the - * lec sees a TLV it uses the pointer to call this function. - * - */ -static void lane2_assoc_ind(struct net_device *dev, const u8 *mac_addr, - const u8 *tlvs, u32 sizeoftlvs) -{ - uint32_t type; - uint8_t length, mpoa_device_type, number_of_mps_macs; - const uint8_t *end_of_tlvs; - struct mpoa_client *mpc; - - mpoa_device_type = number_of_mps_macs = 0; /* silence gcc */ - dprintk("(%s) received TLV(s), ", dev->name); - dprintk("total length of all TLVs %d\n", sizeoftlvs); - mpc = find_mpc_by_lec(dev); /* Sampo-Fix: moved here from below */ - if (mpc == NULL) { - pr_info("(%s) no mpc\n", dev->name); - return; - } - end_of_tlvs = tlvs + sizeoftlvs; - while (end_of_tlvs - tlvs >= 5) { - type = ((tlvs[0] << 24) | (tlvs[1] << 16) | - (tlvs[2] << 8) | tlvs[3]); - length = tlvs[4]; - tlvs += 5; - dprintk(" type 0x%x length %02x\n", type, length); - if (tlvs + length > end_of_tlvs) { - pr_info("TLV value extends past its buffer, aborting parse\n"); - return; - } - - if (type == 0) { - pr_info("mpoa: (%s) TLV type was 0, returning\n", - dev->name); - return; - } - - if (type != TLV_MPOA_DEVICE_TYPE) { - tlvs += length; - continue; /* skip other TLVs */ - } - mpoa_device_type = *tlvs++; - number_of_mps_macs = *tlvs++; - dprintk("(%s) MPOA device type '%s', ", - dev->name, mpoa_device_type_string(mpoa_device_type)); - if (mpoa_device_type == MPS_AND_MPC && - length < (42 + number_of_mps_macs*ETH_ALEN)) { /* :) */ - pr_info("(%s) short MPOA Device Type TLV\n", - dev->name); - continue; - } - if ((mpoa_device_type == MPS || mpoa_device_type == MPC) && - length < 22 + number_of_mps_macs*ETH_ALEN) { - pr_info("(%s) short MPOA Device Type TLV\n", dev->name); - continue; - } - if (mpoa_device_type != MPS && - mpoa_device_type != MPS_AND_MPC) { - dprintk("ignoring non-MPS device "); - if (mpoa_device_type == MPC) - tlvs += 20; - continue; /* we are only interested in MPSs */ - } - if (number_of_mps_macs == 0 && - mpoa_device_type == MPS_AND_MPC) { - pr_info("(%s) MPS_AND_MPC has zero MACs\n", dev->name); - continue; /* someone should read the spec */ - } - dprintk_cont("this MPS has %d MAC addresses\n", - number_of_mps_macs); - - /* - * ok, now we can go and tell our daemon - * the control address of MPS - */ - send_set_mps_ctrl_addr(tlvs, mpc); - - tlvs = copy_macs(mpc, mac_addr, tlvs, - number_of_mps_macs, mpoa_device_type); - if (tlvs == NULL) - return; - } - if (end_of_tlvs - tlvs != 0) - pr_info("(%s) ignoring %zd bytes of trailing TLV garbage\n", - dev->name, end_of_tlvs - tlvs); -} - -/* - * Store at least advertizing router's MAC address - * plus the possible MAC address(es) to mpc->mps_macs. - * For a freshly allocated MPOA client mpc->mps_macs == 0. - */ -static const uint8_t *copy_macs(struct mpoa_client *mpc, - const uint8_t *router_mac, - const uint8_t *tlvs, uint8_t mps_macs, - uint8_t device_type) -{ - int num_macs; - num_macs = (mps_macs > 1) ? mps_macs : 1; - - if (mpc->number_of_mps_macs != num_macs) { /* need to reallocate? */ - if (mpc->number_of_mps_macs != 0) - kfree(mpc->mps_macs); - mpc->number_of_mps_macs = 0; - mpc->mps_macs = kmalloc_array(ETH_ALEN, num_macs, GFP_KERNEL); - if (mpc->mps_macs == NULL) { - pr_info("(%s) out of mem\n", mpc->dev->name); - return NULL; - } - } - ether_addr_copy(mpc->mps_macs, router_mac); - tlvs += 20; if (device_type == MPS_AND_MPC) tlvs += 20; - if (mps_macs > 0) - memcpy(mpc->mps_macs, tlvs, mps_macs*ETH_ALEN); - tlvs += mps_macs*ETH_ALEN; - mpc->number_of_mps_macs = num_macs; - - return tlvs; -} - -static int send_via_shortcut(struct sk_buff *skb, struct mpoa_client *mpc) -{ - in_cache_entry *entry; - struct iphdr *iph; - char *buff; - __be32 ipaddr = 0; - - static struct { - struct llc_snap_hdr hdr; - __be32 tag; - } tagged_llc_snap_hdr = { - {0xaa, 0xaa, 0x03, {0x00, 0x00, 0x00}, {0x88, 0x4c}}, - 0 - }; - - buff = skb->data + mpc->dev->hard_header_len; - iph = (struct iphdr *)buff; - ipaddr = iph->daddr; - - ddprintk("(%s) ipaddr 0x%x\n", - mpc->dev->name, ipaddr); - - entry = mpc->in_ops->get(ipaddr, mpc); - if (entry == NULL) { - entry = mpc->in_ops->add_entry(ipaddr, mpc); - if (entry != NULL) - mpc->in_ops->put(entry); - return 1; - } - /* threshold not exceeded or VCC not ready */ - if (mpc->in_ops->cache_hit(entry, mpc) != OPEN) { - ddprintk("(%s) cache_hit: returns != OPEN\n", - mpc->dev->name); - mpc->in_ops->put(entry); - return 1; - } - - ddprintk("(%s) using shortcut\n", - mpc->dev->name); - /* MPOA spec A.1.4, MPOA client must decrement IP ttl at least by one */ - if (iph->ttl <= 1) { - ddprintk("(%s) IP ttl = %u, using LANE\n", - mpc->dev->name, iph->ttl); - mpc->in_ops->put(entry); - return 1; - } - iph->ttl--; - iph->check = 0; - iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); - - if (entry->ctrl_info.tag != 0) { - ddprintk("(%s) adding tag 0x%x\n", - mpc->dev->name, entry->ctrl_info.tag); - tagged_llc_snap_hdr.tag = entry->ctrl_info.tag; - skb_pull(skb, ETH_HLEN); /* get rid of Eth header */ - skb_push(skb, sizeof(tagged_llc_snap_hdr)); - /* add LLC/SNAP header */ - skb_copy_to_linear_data(skb, &tagged_llc_snap_hdr, - sizeof(tagged_llc_snap_hdr)); - } else { - skb_pull(skb, ETH_HLEN); /* get rid of Eth header */ - skb_push(skb, sizeof(struct llc_snap_hdr)); - /* add LLC/SNAP header + tag */ - skb_copy_to_linear_data(skb, &llc_snap_mpoa_data, - sizeof(struct llc_snap_hdr)); - } - - atm_account_tx(entry->shortcut, skb); - entry->shortcut->send(entry->shortcut, skb); - entry->packets_fwded++; - mpc->in_ops->put(entry); - - return 0; -} - -/* - * Probably needs some error checks and locking, not sure... - */ -static netdev_tx_t mpc_send_packet(struct sk_buff *skb, - struct net_device *dev) -{ - struct mpoa_client *mpc; - struct ethhdr *eth; - int i = 0; - - mpc = find_mpc_by_lec(dev); /* this should NEVER fail */ - if (mpc == NULL) { - pr_info("(%s) no MPC found\n", dev->name); - goto non_ip; - } - - eth = (struct ethhdr *)skb->data; - if (eth->h_proto != htons(ETH_P_IP)) - goto non_ip; /* Multi-Protocol Over ATM :-) */ - - /* Weed out funny packets (e.g., AF_PACKET or raw). */ - if (skb->len < ETH_HLEN + sizeof(struct iphdr)) - goto non_ip; - skb_set_network_header(skb, ETH_HLEN); - if (skb->len < ETH_HLEN + ip_hdr(skb)->ihl * 4 || ip_hdr(skb)->ihl < 5) - goto non_ip; - - while (i < mpc->number_of_mps_macs) { - if (ether_addr_equal(eth->h_dest, mpc->mps_macs + i * ETH_ALEN)) - if (send_via_shortcut(skb, mpc) == 0) /* try shortcut */ - return NETDEV_TX_OK; - i++; - } - -non_ip: - return __netdev_start_xmit(mpc->old_ops, skb, dev, false); -} - -static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg) -{ - int bytes_left; - struct mpoa_client *mpc; - struct atmmpc_ioc ioc_data; - in_cache_entry *in_entry; - __be32 ipaddr; - - bytes_left = copy_from_user(&ioc_data, arg, sizeof(struct atmmpc_ioc)); - if (bytes_left != 0) { - pr_info("mpoa:Short read (missed %d bytes) from userland\n", - bytes_left); - return -EFAULT; - } - ipaddr = ioc_data.ipaddr; - if (ioc_data.dev_num < 0 || ioc_data.dev_num >= MAX_LEC_ITF) - return -EINVAL; - - mpc = find_mpc_by_itfnum(ioc_data.dev_num); - if (mpc == NULL) - return -EINVAL; - - if (ioc_data.type == MPC_SOCKET_INGRESS) { - in_entry = mpc->in_ops->get(ipaddr, mpc); - if (in_entry == NULL || - in_entry->entry_state < INGRESS_RESOLVED) { - pr_info("(%s) did not find RESOLVED entry from ingress cache\n", - mpc->dev->name); - if (in_entry != NULL) - mpc->in_ops->put(in_entry); - return -EINVAL; - } - pr_info("(%s) attaching ingress SVC, entry = %pI4\n", - mpc->dev->name, &in_entry->ctrl_info.in_dst_ip); - in_entry->shortcut = vcc; - mpc->in_ops->put(in_entry); - } else { - pr_info("(%s) attaching egress SVC\n", mpc->dev->name); - } - - vcc->proto_data = mpc->dev; - vcc->push = mpc_push; - - return 0; -} - -/* - * - */ -static void mpc_vcc_close(struct atm_vcc *vcc, struct net_device *dev) -{ - struct mpoa_client *mpc; - in_cache_entry *in_entry; - eg_cache_entry *eg_entry; - - mpc = find_mpc_by_lec(dev); - if (mpc == NULL) { - pr_info("(%s) close for unknown MPC\n", dev->name); - return; - } - - dprintk("(%s)\n", dev->name); - in_entry = mpc->in_ops->get_by_vcc(vcc, mpc); - if (in_entry) { - dprintk("(%s) ingress SVC closed ip = %pI4\n", - mpc->dev->name, &in_entry->ctrl_info.in_dst_ip); - in_entry->shortcut = NULL; - mpc->in_ops->put(in_entry); - } - eg_entry = mpc->eg_ops->get_by_vcc(vcc, mpc); - if (eg_entry) { - dprintk("(%s) egress SVC closed\n", mpc->dev->name); - eg_entry->shortcut = NULL; - mpc->eg_ops->put(eg_entry); - } - - if (in_entry == NULL && eg_entry == NULL) - dprintk("(%s) unused vcc closed\n", dev->name); -} - -static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb) -{ - struct net_device *dev = (struct net_device *)vcc->proto_data; - struct sk_buff *new_skb; - eg_cache_entry *eg; - struct mpoa_client *mpc; - __be32 tag; - char *tmp; - - ddprintk("(%s)\n", dev->name); - if (skb == NULL) { - dprintk("(%s) null skb, closing VCC\n", dev->name); - mpc_vcc_close(vcc, dev); - return; - } - - skb->dev = dev; - if (memcmp(skb->data, &llc_snap_mpoa_ctrl, - sizeof(struct llc_snap_hdr)) == 0) { - struct sock *sk = sk_atm(vcc); - - dprintk("(%s) control packet arrived\n", dev->name); - /* Pass control packets to daemon */ - skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk); - return; - } - - /* data coming over the shortcut */ - atm_return(vcc, skb->truesize); - - mpc = find_mpc_by_lec(dev); - if (mpc == NULL) { - pr_info("(%s) unknown MPC\n", dev->name); - return; - } - - if (memcmp(skb->data, &llc_snap_mpoa_data_tagged, - sizeof(struct llc_snap_hdr)) == 0) { /* MPOA tagged data */ - ddprintk("(%s) tagged data packet arrived\n", dev->name); - - } else if (memcmp(skb->data, &llc_snap_mpoa_data, - sizeof(struct llc_snap_hdr)) == 0) { /* MPOA data */ - pr_info("(%s) Unsupported non-tagged data packet arrived. Purging\n", - dev->name); - dev_kfree_skb_any(skb); - return; - } else { - pr_info("(%s) garbage arrived, purging\n", dev->name); - dev_kfree_skb_any(skb); - return; - } - - tmp = skb->data + sizeof(struct llc_snap_hdr); - tag = *(__be32 *)tmp; - - eg = mpc->eg_ops->get_by_tag(tag, mpc); - if (eg == NULL) { - pr_info("mpoa: (%s) Didn't find egress cache entry, tag = %u\n", - dev->name, tag); - purge_egress_shortcut(vcc, NULL); - dev_kfree_skb_any(skb); - return; - } - - /* - * See if ingress MPC is using shortcut we opened as a return channel. - * This means we have a bi-directional vcc opened by us. - */ - if (eg->shortcut == NULL) { - eg->shortcut = vcc; - pr_info("(%s) egress SVC in use\n", dev->name); - } - - skb_pull(skb, sizeof(struct llc_snap_hdr) + sizeof(tag)); - /* get rid of LLC/SNAP header */ - new_skb = skb_realloc_headroom(skb, eg->ctrl_info.DH_length); - /* LLC/SNAP is shorter than MAC header :( */ - dev_kfree_skb_any(skb); - if (new_skb == NULL) { - mpc->eg_ops->put(eg); - return; - } - skb_push(new_skb, eg->ctrl_info.DH_length); /* add MAC header */ - skb_copy_to_linear_data(new_skb, eg->ctrl_info.DLL_header, - eg->ctrl_info.DH_length); - new_skb->protocol = eth_type_trans(new_skb, dev); - skb_reset_network_header(new_skb); - - eg->latest_ip_addr = ip_hdr(new_skb)->saddr; - eg->packets_rcvd++; - mpc->eg_ops->put(eg); - - memset(ATM_SKB(new_skb), 0, sizeof(struct atm_skb_data)); - netif_rx(new_skb); -} - -static const struct atmdev_ops mpc_ops = { /* only send is required */ - .close = mpoad_close, - .send = msg_from_mpoad -}; - -static struct atm_dev mpc_dev = { - .ops = &mpc_ops, - .type = "mpc", - .number = 42, - .lock = __SPIN_LOCK_UNLOCKED(mpc_dev.lock) - /* members not explicitly initialised will be 0 */ -}; - -static int atm_mpoa_mpoad_attach(struct atm_vcc *vcc, int arg) -{ - struct mpoa_client *mpc; - struct lec_priv *priv; - int err; - - if (mpcs == NULL) { - mpc_timer_refresh(); - - /* This lets us now how our LECs are doing */ - err = register_netdevice_notifier(&mpoa_notifier); - if (err < 0) { - timer_delete(&mpc_timer); - return err; - } - } - - mpc = find_mpc_by_itfnum(arg); - if (mpc == NULL) { - dprintk("allocating new mpc for itf %d\n", arg); - mpc = alloc_mpc(); - if (mpc == NULL) - return -ENOMEM; - mpc->dev_num = arg; - mpc->dev = find_lec_by_itfnum(arg); - /* NULL if there was no lec */ - } - if (mpc->mpoad_vcc) { - pr_info("mpoad is already present for itf %d\n", arg); - return -EADDRINUSE; - } - - if (mpc->dev) { /* check if the lec is LANE2 capable */ - priv = netdev_priv(mpc->dev); - if (priv->lane_version < 2) { - dev_put(mpc->dev); - mpc->dev = NULL; - } else - priv->lane2_ops->associate_indicator = lane2_assoc_ind; - } - - mpc->mpoad_vcc = vcc; - vcc->dev = &mpc_dev; - vcc_insert_socket(sk_atm(vcc)); - set_bit(ATM_VF_META, &vcc->flags); - set_bit(ATM_VF_READY, &vcc->flags); - - if (mpc->dev) { - char empty[ATM_ESA_LEN]; - memset(empty, 0, ATM_ESA_LEN); - - start_mpc(mpc, mpc->dev); - /* set address if mpcd e.g. gets killed and restarted. - * If we do not do it now we have to wait for the next LE_ARP - */ - if (memcmp(mpc->mps_ctrl_addr, empty, ATM_ESA_LEN) != 0) - send_set_mps_ctrl_addr(mpc->mps_ctrl_addr, mpc); - } - - __module_get(THIS_MODULE); - return arg; -} - -static void send_set_mps_ctrl_addr(const char *addr, struct mpoa_client *mpc) -{ - struct k_message mesg; - - memcpy(mpc->mps_ctrl_addr, addr, ATM_ESA_LEN); - - mesg.type = SET_MPS_CTRL_ADDR; - memcpy(mesg.MPS_ctrl, addr, ATM_ESA_LEN); - msg_to_mpoad(&mesg, mpc); -} - -static void mpoad_close(struct atm_vcc *vcc) -{ - struct mpoa_client *mpc; - struct sk_buff *skb; - - mpc = find_mpc_by_vcc(vcc); - if (mpc == NULL) { - pr_info("did not find MPC\n"); - return; - } - if (!mpc->mpoad_vcc) { - pr_info("close for non-present mpoad\n"); - return; - } - - mpc->mpoad_vcc = NULL; - if (mpc->dev) { - struct lec_priv *priv = netdev_priv(mpc->dev); - priv->lane2_ops->associate_indicator = NULL; - stop_mpc(mpc); - dev_put(mpc->dev); - } - - mpc->in_ops->destroy_cache(mpc); - mpc->eg_ops->destroy_cache(mpc); - - while ((skb = skb_dequeue(&sk_atm(vcc)->sk_receive_queue))) { - atm_return(vcc, skb->truesize); - kfree_skb(skb); - } - - pr_info("(%s) going down\n", - (mpc->dev) ? mpc->dev->name : "<unknown>"); - module_put(THIS_MODULE); -} - -/* - * - */ -static int msg_from_mpoad(struct atm_vcc *vcc, struct sk_buff *skb) -{ - - struct mpoa_client *mpc = find_mpc_by_vcc(vcc); - struct k_message *mesg = (struct k_message *)skb->data; - WARN_ON(refcount_sub_and_test(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc)); - - if (mpc == NULL) { - pr_info("no mpc found\n"); - return 0; - } - dprintk("(%s)", mpc->dev ? mpc->dev->name : "<unknown>"); - switch (mesg->type) { - case MPOA_RES_REPLY_RCVD: - dprintk_cont("mpoa_res_reply_rcvd\n"); - MPOA_res_reply_rcvd(mesg, mpc); - break; - case MPOA_TRIGGER_RCVD: - dprintk_cont("mpoa_trigger_rcvd\n"); - MPOA_trigger_rcvd(mesg, mpc); - break; - case INGRESS_PURGE_RCVD: - dprintk_cont("nhrp_purge_rcvd\n"); - ingress_purge_rcvd(mesg, mpc); - break; - case EGRESS_PURGE_RCVD: - dprintk_cont("egress_purge_reply_rcvd\n"); - egress_purge_rcvd(mesg, mpc); - break; - case MPS_DEATH: - dprintk_cont("mps_death\n"); - mps_death(mesg, mpc); - break; - case CACHE_IMPOS_RCVD: - dprintk_cont("cache_impos_rcvd\n"); - MPOA_cache_impos_rcvd(mesg, mpc); - break; - case SET_MPC_CTRL_ADDR: - dprintk_cont("set_mpc_ctrl_addr\n"); - set_mpc_ctrl_addr_rcvd(mesg, mpc); - break; - case SET_MPS_MAC_ADDR: - dprintk_cont("set_mps_mac_addr\n"); - set_mps_mac_addr_rcvd(mesg, mpc); - break; - case CLEAN_UP_AND_EXIT: - dprintk_cont("clean_up_and_exit\n"); - clean_up(mesg, mpc, DIE); - break; - case RELOAD: - dprintk_cont("reload\n"); - clean_up(mesg, mpc, RELOAD); - break; - case SET_MPC_PARAMS: - dprintk_cont("set_mpc_params\n"); - mpc->parameters = mesg->content.params; - break; - default: - dprintk_cont("unknown message %d\n", mesg->type); - break; - } - kfree_skb(skb); - - return 0; -} - -/* Remember that this function may not do things that sleep */ -int msg_to_mpoad(struct k_message *mesg, struct mpoa_client *mpc) -{ - struct sk_buff *skb; - struct sock *sk; - - if (mpc == NULL || !mpc->mpoad_vcc) { - pr_info("mesg %d to a non-existent mpoad\n", mesg->type); - return -ENXIO; - } - - skb = alloc_skb(sizeof(struct k_message), GFP_ATOMIC); - if (skb == NULL) - return -ENOMEM; - skb_put(skb, sizeof(struct k_message)); - skb_copy_to_linear_data(skb, mesg, sizeof(*mesg)); - atm_force_charge(mpc->mpoad_vcc, skb->truesize); - - sk = sk_atm(mpc->mpoad_vcc); - skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk); - - return 0; -} - -static int mpoa_event_listener(struct notifier_block *mpoa_notifier, - unsigned long event, void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct mpoa_client *mpc; - struct lec_priv *priv; - - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; - - if (strncmp(dev->name, "lec", 3)) - return NOTIFY_DONE; /* we are only interested in lec:s */ - - switch (event) { - case NETDEV_REGISTER: /* a new lec device was allocated */ - priv = netdev_priv(dev); - if (priv->lane_version < 2) - break; - priv->lane2_ops->associate_indicator = lane2_assoc_ind; - mpc = find_mpc_by_itfnum(priv->itfnum); - if (mpc == NULL) { - dprintk("allocating new mpc for %s\n", dev->name); - mpc = alloc_mpc(); - if (mpc == NULL) { - pr_info("no new mpc"); - break; - } - } - mpc->dev_num = priv->itfnum; - mpc->dev = dev; - dev_hold(dev); - dprintk("(%s) was initialized\n", dev->name); - break; - case NETDEV_UNREGISTER: - /* the lec device was deallocated */ - mpc = find_mpc_by_lec(dev); - if (mpc == NULL) - break; - dprintk("device (%s) was deallocated\n", dev->name); - stop_mpc(mpc); - dev_put(mpc->dev); - mpc->dev = NULL; - break; - case NETDEV_UP: - /* the dev was ifconfig'ed up */ - mpc = find_mpc_by_lec(dev); - if (mpc == NULL) - break; - if (mpc->mpoad_vcc != NULL) - start_mpc(mpc, dev); - break; - case NETDEV_DOWN: - /* the dev was ifconfig'ed down */ - /* this means that the flow of packets from the - * upper layer stops - */ - mpc = find_mpc_by_lec(dev); - if (mpc == NULL) - break; - if (mpc->mpoad_vcc != NULL) - stop_mpc(mpc); - break; - case NETDEV_REBOOT: - case NETDEV_CHANGE: - case NETDEV_CHANGEMTU: - case NETDEV_CHANGEADDR: - case NETDEV_GOING_DOWN: - break; - default: - break; - } - - return NOTIFY_DONE; -} - -/* - * Functions which are called after a message is received from mpcd. - * Msg is reused on purpose. - */ - - -static void MPOA_trigger_rcvd(struct k_message *msg, struct mpoa_client *mpc) -{ - __be32 dst_ip = msg->content.in_info.in_dst_ip; - in_cache_entry *entry; - - entry = mpc->in_ops->get(dst_ip, mpc); - if (entry == NULL) { - entry = mpc->in_ops->add_entry(dst_ip, mpc); - entry->entry_state = INGRESS_RESOLVING; - msg->type = SND_MPOA_RES_RQST; - msg->content.in_info = entry->ctrl_info; - msg_to_mpoad(msg, mpc); - entry->reply_wait = ktime_get_seconds(); - mpc->in_ops->put(entry); - return; - } - - if (entry->entry_state == INGRESS_INVALID) { - entry->entry_state = INGRESS_RESOLVING; - msg->type = SND_MPOA_RES_RQST; - msg->content.in_info = entry->ctrl_info; - msg_to_mpoad(msg, mpc); - entry->reply_wait = ktime_get_seconds(); - mpc->in_ops->put(entry); - return; - } - - pr_info("(%s) entry already in resolving state\n", - (mpc->dev) ? mpc->dev->name : "<unknown>"); - mpc->in_ops->put(entry); -} - -/* - * Things get complicated because we have to check if there's an egress - * shortcut with suitable traffic parameters we could use. - */ -static void check_qos_and_open_shortcut(struct k_message *msg, - struct mpoa_client *client, - in_cache_entry *entry) -{ - __be32 dst_ip = msg->content.in_info.in_dst_ip; - struct atm_mpoa_qos *qos = atm_mpoa_search_qos(dst_ip); - eg_cache_entry *eg_entry = client->eg_ops->get_by_src_ip(dst_ip, client); - - if (eg_entry && eg_entry->shortcut) { - if (eg_entry->shortcut->qos.txtp.traffic_class & - msg->qos.txtp.traffic_class & - (qos ? qos->qos.txtp.traffic_class : ATM_UBR | ATM_CBR)) { - if (eg_entry->shortcut->qos.txtp.traffic_class == ATM_UBR) - entry->shortcut = eg_entry->shortcut; - else if (eg_entry->shortcut->qos.txtp.max_pcr > 0) - entry->shortcut = eg_entry->shortcut; - } - if (entry->shortcut) { - dprintk("(%s) using egress SVC to reach %pI4\n", - client->dev->name, &dst_ip); - client->eg_ops->put(eg_entry); - return; - } - } - if (eg_entry != NULL) - client->eg_ops->put(eg_entry); - - /* No luck in the egress cache we must open an ingress SVC */ - msg->type = OPEN_INGRESS_SVC; - if (qos && - (qos->qos.txtp.traffic_class == msg->qos.txtp.traffic_class)) { - msg->qos = qos->qos; - pr_info("(%s) trying to get a CBR shortcut\n", - client->dev->name); - } else - memset(&msg->qos, 0, sizeof(struct atm_qos)); - msg_to_mpoad(msg, client); -} - -static void MPOA_res_reply_rcvd(struct k_message *msg, struct mpoa_client *mpc) -{ - __be32 dst_ip = msg->content.in_info.in_dst_ip; - in_cache_entry *entry = mpc->in_ops->get(dst_ip, mpc); - - dprintk("(%s) ip %pI4\n", - mpc->dev->name, &dst_ip); - ddprintk("(%s) entry = %p", - mpc->dev->name, entry); - if (entry == NULL) { - pr_info("(%s) ARGH, received res. reply for an entry that doesn't exist.\n", - mpc->dev->name); - return; - } - ddprintk_cont(" entry_state = %d ", entry->entry_state); - - if (entry->entry_state == INGRESS_RESOLVED) { - pr_info("(%s) RESOLVED entry!\n", mpc->dev->name); - mpc->in_ops->put(entry); - return; - } - - entry->ctrl_info = msg->content.in_info; - entry->time = ktime_get_seconds(); - /* Used in refreshing func from now on */ - entry->reply_wait = ktime_get_seconds(); - entry->refresh_time = 0; - ddprintk_cont("entry->shortcut = %p\n", entry->shortcut); - - if (entry->entry_state == INGRESS_RESOLVING && - entry->shortcut != NULL) { - entry->entry_state = INGRESS_RESOLVED; - mpc->in_ops->put(entry); - return; /* Shortcut already open... */ - } - - if (entry->shortcut != NULL) { - pr_info("(%s) entry->shortcut != NULL, impossible!\n", - mpc->dev->name); - mpc->in_ops->put(entry); - return; - } - - check_qos_and_open_shortcut(msg, mpc, entry); - entry->entry_state = INGRESS_RESOLVED; - mpc->in_ops->put(entry); - - return; - -} - -static void ingress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc) -{ - __be32 dst_ip = msg->content.in_info.in_dst_ip; - __be32 mask = msg->ip_mask; - in_cache_entry *entry = mpc->in_ops->get_with_mask(dst_ip, mpc, mask); - - if (entry == NULL) { - pr_info("(%s) purge for a non-existing entry, ip = %pI4\n", - mpc->dev->name, &dst_ip); - return; - } - - do { - dprintk("(%s) removing an ingress entry, ip = %pI4\n", - mpc->dev->name, &dst_ip); - write_lock_bh(&mpc->ingress_lock); - mpc->in_ops->remove_entry(entry, mpc); - write_unlock_bh(&mpc->ingress_lock); - mpc->in_ops->put(entry); - entry = mpc->in_ops->get_with_mask(dst_ip, mpc, mask); - } while (entry != NULL); -} - -static void egress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc) -{ - __be32 cache_id = msg->content.eg_info.cache_id; - eg_cache_entry *entry = mpc->eg_ops->get_by_cache_id(cache_id, mpc); - - if (entry == NULL) { - dprintk("(%s) purge for a non-existing entry\n", - mpc->dev->name); - return; - } - - write_lock_irq(&mpc->egress_lock); - mpc->eg_ops->remove_entry(entry, mpc); - write_unlock_irq(&mpc->egress_lock); - - mpc->eg_ops->put(entry); -} - -static void purge_egress_shortcut(struct atm_vcc *vcc, eg_cache_entry *entry) -{ - struct sock *sk; - struct k_message *purge_msg; - struct sk_buff *skb; - - dprintk("entering\n"); - if (vcc == NULL) { - pr_info("vcc == NULL\n"); - return; - } - - skb = alloc_skb(sizeof(struct k_message), GFP_ATOMIC); - if (skb == NULL) { - pr_info("out of memory\n"); - return; - } - - skb_put(skb, sizeof(struct k_message)); - memset(skb->data, 0, sizeof(struct k_message)); - purge_msg = (struct k_message *)skb->data; - purge_msg->type = DATA_PLANE_PURGE; - if (entry != NULL) - purge_msg->content.eg_info = entry->ctrl_info; - - atm_force_charge(vcc, skb->truesize); - - sk = sk_atm(vcc); - skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk); - dprintk("exiting\n"); -} - -/* - * Our MPS died. Tell our daemon to send NHRP data plane purge to each - * of the egress shortcuts we have. - */ -static void mps_death(struct k_message *msg, struct mpoa_client *mpc) -{ - eg_cache_entry *entry; - - dprintk("(%s)\n", mpc->dev->name); - - if (memcmp(msg->MPS_ctrl, mpc->mps_ctrl_addr, ATM_ESA_LEN)) { - pr_info("(%s) wrong MPS\n", mpc->dev->name); - return; - } - - /* FIXME: This knows too much of the cache structure */ - read_lock_irq(&mpc->egress_lock); - entry = mpc->eg_cache; - while (entry != NULL) { - purge_egress_shortcut(entry->shortcut, entry); - entry = entry->next; - } - read_unlock_irq(&mpc->egress_lock); - - mpc->in_ops->destroy_cache(mpc); - mpc->eg_ops->destroy_cache(mpc); -} - -static void MPOA_cache_impos_rcvd(struct k_message *msg, - struct mpoa_client *mpc) -{ - uint16_t holding_time; - eg_cache_entry *entry = mpc->eg_ops->get_by_cache_id(msg->content.eg_info.cache_id, mpc); - - holding_time = msg->content.eg_info.holding_time; - dprintk("(%s) entry = %p, holding_time = %u\n", - mpc->dev->name, entry, holding_time); - if (entry == NULL && !holding_time) - return; - if (entry == NULL && holding_time) { - entry = mpc->eg_ops->add_entry(msg, mpc); - mpc->eg_ops->put(entry); - return; - } - if (holding_time) { - mpc->eg_ops->update(entry, holding_time); - return; - } - - write_lock_irq(&mpc->egress_lock); - mpc->eg_ops->remove_entry(entry, mpc); - write_unlock_irq(&mpc->egress_lock); - - mpc->eg_ops->put(entry); -} - -static void set_mpc_ctrl_addr_rcvd(struct k_message *mesg, - struct mpoa_client *mpc) -{ - struct lec_priv *priv; - int i, retval ; - - uint8_t tlv[4 + 1 + 1 + 1 + ATM_ESA_LEN]; - - tlv[0] = 00; tlv[1] = 0xa0; tlv[2] = 0x3e; tlv[3] = 0x2a; /* type */ - tlv[4] = 1 + 1 + ATM_ESA_LEN; /* length */ - tlv[5] = 0x02; /* MPOA client */ - tlv[6] = 0x00; /* number of MPS MAC addresses */ - - memcpy(&tlv[7], mesg->MPS_ctrl, ATM_ESA_LEN); /* MPC ctrl ATM addr */ - memcpy(mpc->our_ctrl_addr, mesg->MPS_ctrl, ATM_ESA_LEN); - - dprintk("(%s) setting MPC ctrl ATM address to", - mpc->dev ? mpc->dev->name : "<unknown>"); - for (i = 7; i < sizeof(tlv); i++) - dprintk_cont(" %02x", tlv[i]); - dprintk_cont("\n"); - - if (mpc->dev) { - priv = netdev_priv(mpc->dev); - retval = priv->lane2_ops->associate_req(mpc->dev, - mpc->dev->dev_addr, - tlv, sizeof(tlv)); - if (retval == 0) - pr_info("(%s) MPOA device type TLV association failed\n", - mpc->dev->name); - retval = priv->lane2_ops->resolve(mpc->dev, NULL, 1, NULL, NULL); - if (retval < 0) - pr_info("(%s) targetless LE_ARP request failed\n", - mpc->dev->name); - } -} - -static void set_mps_mac_addr_rcvd(struct k_message *msg, - struct mpoa_client *client) -{ - - if (client->number_of_mps_macs) - kfree(client->mps_macs); - client->number_of_mps_macs = 0; - client->mps_macs = kmemdup(msg->MPS_ctrl, ETH_ALEN, GFP_KERNEL); - if (client->mps_macs == NULL) { - pr_info("out of memory\n"); - return; - } - client->number_of_mps_macs = 1; -} - -/* - * purge egress cache and tell daemon to 'action' (DIE, RELOAD) - */ -static void clean_up(struct k_message *msg, struct mpoa_client *mpc, int action) -{ - - eg_cache_entry *entry; - msg->type = SND_EGRESS_PURGE; - - - /* FIXME: This knows too much of the cache structure */ - read_lock_irq(&mpc->egress_lock); - entry = mpc->eg_cache; - while (entry != NULL) { - msg->content.eg_info = entry->ctrl_info; - dprintk("cache_id %u\n", entry->ctrl_info.cache_id); - msg_to_mpoad(msg, mpc); - entry = entry->next; - } - read_unlock_irq(&mpc->egress_lock); - - msg->type = action; - msg_to_mpoad(msg, mpc); -} - -static unsigned long checking_time; - -static void mpc_timer_refresh(void) -{ - mpc_timer.expires = jiffies + (MPC_P2 * HZ); - checking_time = mpc_timer.expires; - add_timer(&mpc_timer); -} - -static void mpc_cache_check(struct timer_list *unused) -{ - struct mpoa_client *mpc = mpcs; - static unsigned long previous_resolving_check_time; - static unsigned long previous_refresh_time; - - while (mpc != NULL) { - mpc->in_ops->clear_count(mpc); - mpc->eg_ops->clear_expired(mpc); - if (checking_time - previous_resolving_check_time > - mpc->parameters.mpc_p4 * HZ) { - mpc->in_ops->check_resolving(mpc); - previous_resolving_check_time = checking_time; - } - if (checking_time - previous_refresh_time > - mpc->parameters.mpc_p5 * HZ) { - mpc->in_ops->refresh(mpc); - previous_refresh_time = checking_time; - } - mpc = mpc->next; - } - mpc_timer_refresh(); -} - -static int atm_mpoa_ioctl(struct socket *sock, unsigned int cmd, - unsigned long arg) -{ - int err = 0; - struct atm_vcc *vcc = ATM_SD(sock); - - if (cmd != ATMMPC_CTRL && cmd != ATMMPC_DATA) - return -ENOIOCTLCMD; - - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - switch (cmd) { - case ATMMPC_CTRL: - err = atm_mpoa_mpoad_attach(vcc, (int)arg); - if (err >= 0) - sock->state = SS_CONNECTED; - break; - case ATMMPC_DATA: - err = atm_mpoa_vcc_attach(vcc, (void __user *)arg); - break; - default: - break; - } - return err; -} - -static struct atm_ioctl atm_ioctl_ops = { - .owner = THIS_MODULE, - .ioctl = atm_mpoa_ioctl, -}; - -static __init int atm_mpoa_init(void) -{ - register_atm_ioctl(&atm_ioctl_ops); - - if (mpc_proc_init() != 0) - pr_info("failed to initialize /proc/mpoa\n"); - - pr_info("mpc.c: initialized\n"); - - return 0; -} - -static void __exit atm_mpoa_cleanup(void) -{ - struct mpoa_client *mpc, *tmp; - struct atm_mpoa_qos *qos, *nextqos; - struct lec_priv *priv; - - mpc_proc_clean(); - - timer_delete_sync(&mpc_timer); - unregister_netdevice_notifier(&mpoa_notifier); - deregister_atm_ioctl(&atm_ioctl_ops); - - mpc = mpcs; - mpcs = NULL; - while (mpc != NULL) { - tmp = mpc->next; - if (mpc->dev != NULL) { - stop_mpc(mpc); - priv = netdev_priv(mpc->dev); - if (priv->lane2_ops != NULL) - priv->lane2_ops->associate_indicator = NULL; - } - ddprintk("about to clear caches\n"); - mpc->in_ops->destroy_cache(mpc); - mpc->eg_ops->destroy_cache(mpc); - ddprintk("caches cleared\n"); - kfree(mpc->mps_macs); - memset(mpc, 0, sizeof(struct mpoa_client)); - ddprintk("about to kfree %p\n", mpc); - kfree(mpc); - ddprintk("next mpc is at %p\n", tmp); - mpc = tmp; - } - - qos = qos_head; - qos_head = NULL; - while (qos != NULL) { - nextqos = qos->next; - dprintk("freeing qos entry %p\n", qos); - kfree(qos); - qos = nextqos; - } -} - -module_init(atm_mpoa_init); -module_exit(atm_mpoa_cleanup); - -MODULE_DESCRIPTION("Multi-Protocol Over ATM (MPOA) driver"); -MODULE_LICENSE("GPL"); diff --git a/net/atm/mpc.h b/net/atm/mpc.h deleted file mode 100644 index 454abd07651a..000000000000 --- a/net/atm/mpc.h +++ /dev/null @@ -1,65 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _MPC_H_ -#define _MPC_H_ - -#include <linux/types.h> -#include <linux/atm.h> -#include <linux/atmmpc.h> -#include <linux/skbuff.h> -#include <linux/spinlock.h> -#include "mpoa_caches.h" - -/* kernel -> mpc-daemon */ -int msg_to_mpoad(struct k_message *msg, struct mpoa_client *mpc); - -struct mpoa_client { - struct mpoa_client *next; - struct net_device *dev; /* lec in question */ - int dev_num; /* e.g. 2 for lec2 */ - - struct atm_vcc *mpoad_vcc; /* control channel to mpoad */ - uint8_t mps_ctrl_addr[ATM_ESA_LEN]; /* MPS control ATM address */ - uint8_t our_ctrl_addr[ATM_ESA_LEN]; /* MPC's control ATM address */ - - rwlock_t ingress_lock; - const struct in_cache_ops *in_ops; /* ingress cache operations */ - in_cache_entry *in_cache; /* the ingress cache of this MPC */ - - rwlock_t egress_lock; - const struct eg_cache_ops *eg_ops; /* egress cache operations */ - eg_cache_entry *eg_cache; /* the egress cache of this MPC */ - - uint8_t *mps_macs; /* array of MPS MAC addresses, >=1 */ - int number_of_mps_macs; /* number of the above MAC addresses */ - struct mpc_parameters parameters; /* parameters for this client */ - - const struct net_device_ops *old_ops; - struct net_device_ops new_ops; -}; - - -struct atm_mpoa_qos { - struct atm_mpoa_qos *next; - __be32 ipaddr; - struct atm_qos qos; -}; - - -/* MPOA QoS operations */ -struct atm_mpoa_qos *atm_mpoa_add_qos(__be32 dst_ip, struct atm_qos *qos); -struct atm_mpoa_qos *atm_mpoa_search_qos(__be32 dst_ip); -int atm_mpoa_delete_qos(struct atm_mpoa_qos *qos); - -/* Display QoS entries. This is for the procfs */ -struct seq_file; -void atm_mpoa_disp_qos(struct seq_file *m); - -#ifdef CONFIG_PROC_FS -int mpc_proc_init(void); -void mpc_proc_clean(void); -#else -#define mpc_proc_init() (0) -#define mpc_proc_clean() do { } while(0) -#endif - -#endif /* _MPC_H_ */ diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c deleted file mode 100644 index c8d4e6f2e831..000000000000 --- a/net/atm/mpoa_caches.c +++ /dev/null @@ -1,565 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/types.h> -#include <linux/atmmpc.h> -#include <linux/slab.h> -#include <linux/time.h> - -#include "mpoa_caches.h" -#include "mpc.h" - -/* - * mpoa_caches.c: Implementation of ingress and egress cache - * handling functions - */ - -#if 0 -#define dprintk(format, args...) \ - printk(KERN_DEBUG "mpoa:%s: " format, __FILE__, ##args) /* debug */ -#else -#define dprintk(format, args...) \ - do { if (0) \ - printk(KERN_DEBUG "mpoa:%s: " format, __FILE__, ##args);\ - } while (0) -#endif - -#if 0 -#define ddprintk(format, args...) \ - printk(KERN_DEBUG "mpoa:%s: " format, __FILE__, ##args) /* debug */ -#else -#define ddprintk(format, args...) \ - do { if (0) \ - printk(KERN_DEBUG "mpoa:%s: " format, __FILE__, ##args);\ - } while (0) -#endif - -static in_cache_entry *in_cache_get(__be32 dst_ip, - struct mpoa_client *client) -{ - in_cache_entry *entry; - - read_lock_bh(&client->ingress_lock); - entry = client->in_cache; - while (entry != NULL) { - if (entry->ctrl_info.in_dst_ip == dst_ip) { - refcount_inc(&entry->use); - read_unlock_bh(&client->ingress_lock); - return entry; - } - entry = entry->next; - } - read_unlock_bh(&client->ingress_lock); - - return NULL; -} - -static in_cache_entry *in_cache_get_with_mask(__be32 dst_ip, - struct mpoa_client *client, - __be32 mask) -{ - in_cache_entry *entry; - - read_lock_bh(&client->ingress_lock); - entry = client->in_cache; - while (entry != NULL) { - if ((entry->ctrl_info.in_dst_ip & mask) == (dst_ip & mask)) { - refcount_inc(&entry->use); - read_unlock_bh(&client->ingress_lock); - return entry; - } - entry = entry->next; - } - read_unlock_bh(&client->ingress_lock); - - return NULL; - -} - -static in_cache_entry *in_cache_get_by_vcc(struct atm_vcc *vcc, - struct mpoa_client *client) -{ - in_cache_entry *entry; - - read_lock_bh(&client->ingress_lock); - entry = client->in_cache; - while (entry != NULL) { - if (entry->shortcut == vcc) { - refcount_inc(&entry->use); - read_unlock_bh(&client->ingress_lock); - return entry; - } - entry = entry->next; - } - read_unlock_bh(&client->ingress_lock); - - return NULL; -} - -static in_cache_entry *in_cache_add_entry(__be32 dst_ip, - struct mpoa_client *client) -{ - in_cache_entry *entry = kzalloc_obj(in_cache_entry); - - if (entry == NULL) { - pr_info("mpoa: mpoa_caches.c: new_in_cache_entry: out of memory\n"); - return NULL; - } - - dprintk("adding an ingress entry, ip = %pI4\n", &dst_ip); - - refcount_set(&entry->use, 1); - dprintk("new_in_cache_entry: about to lock\n"); - write_lock_bh(&client->ingress_lock); - entry->next = client->in_cache; - entry->prev = NULL; - if (client->in_cache != NULL) - client->in_cache->prev = entry; - client->in_cache = entry; - - memcpy(entry->MPS_ctrl_ATM_addr, client->mps_ctrl_addr, ATM_ESA_LEN); - entry->ctrl_info.in_dst_ip = dst_ip; - entry->time = ktime_get_seconds(); - entry->retry_time = client->parameters.mpc_p4; - entry->count = 1; - entry->entry_state = INGRESS_INVALID; - entry->ctrl_info.holding_time = HOLDING_TIME_DEFAULT; - refcount_inc(&entry->use); - - write_unlock_bh(&client->ingress_lock); - dprintk("new_in_cache_entry: unlocked\n"); - - return entry; -} - -static int cache_hit(in_cache_entry *entry, struct mpoa_client *mpc) -{ - struct atm_mpoa_qos *qos; - struct k_message msg; - - entry->count++; - if (entry->entry_state == INGRESS_RESOLVED && entry->shortcut != NULL) - return OPEN; - - if (entry->entry_state == INGRESS_REFRESHING) { - if (entry->count > mpc->parameters.mpc_p1) { - msg.type = SND_MPOA_RES_RQST; - msg.content.in_info = entry->ctrl_info; - memcpy(msg.MPS_ctrl, mpc->mps_ctrl_addr, ATM_ESA_LEN); - qos = atm_mpoa_search_qos(entry->ctrl_info.in_dst_ip); - if (qos != NULL) - msg.qos = qos->qos; - msg_to_mpoad(&msg, mpc); - entry->reply_wait = ktime_get_seconds(); - entry->entry_state = INGRESS_RESOLVING; - } - if (entry->shortcut != NULL) - return OPEN; - return CLOSED; - } - - if (entry->entry_state == INGRESS_RESOLVING && entry->shortcut != NULL) - return OPEN; - - if (entry->count > mpc->parameters.mpc_p1 && - entry->entry_state == INGRESS_INVALID) { - dprintk("(%s) threshold exceeded for ip %pI4, sending MPOA res req\n", - mpc->dev->name, &entry->ctrl_info.in_dst_ip); - entry->entry_state = INGRESS_RESOLVING; - msg.type = SND_MPOA_RES_RQST; - memcpy(msg.MPS_ctrl, mpc->mps_ctrl_addr, ATM_ESA_LEN); - msg.content.in_info = entry->ctrl_info; - qos = atm_mpoa_search_qos(entry->ctrl_info.in_dst_ip); - if (qos != NULL) - msg.qos = qos->qos; - msg_to_mpoad(&msg, mpc); - entry->reply_wait = ktime_get_seconds(); - } - - return CLOSED; -} - -static void in_cache_put(in_cache_entry *entry) -{ - if (refcount_dec_and_test(&entry->use)) { - kfree_sensitive(entry); - } -} - -/* - * This should be called with write lock on - */ -static void in_cache_remove_entry(in_cache_entry *entry, - struct mpoa_client *client) -{ - struct atm_vcc *vcc; - struct k_message msg; - - vcc = entry->shortcut; - dprintk("removing an ingress entry, ip = %pI4\n", - &entry->ctrl_info.in_dst_ip); - - if (entry->prev != NULL) - entry->prev->next = entry->next; - else - client->in_cache = entry->next; - if (entry->next != NULL) - entry->next->prev = entry->prev; - client->in_ops->put(entry); - if (client->in_cache == NULL && client->eg_cache == NULL) { - msg.type = STOP_KEEP_ALIVE_SM; - msg_to_mpoad(&msg, client); - } - - /* Check if the egress side still uses this VCC */ - if (vcc != NULL) { - eg_cache_entry *eg_entry = client->eg_ops->get_by_vcc(vcc, - client); - if (eg_entry != NULL) { - client->eg_ops->put(eg_entry); - return; - } - vcc_release_async(vcc, -EPIPE); - } -} - -/* Call this every MPC-p2 seconds... Not exactly correct solution, - but an easy one... */ -static void clear_count_and_expired(struct mpoa_client *client) -{ - in_cache_entry *entry, *next_entry; - time64_t now; - - now = ktime_get_seconds(); - - write_lock_bh(&client->ingress_lock); - entry = client->in_cache; - while (entry != NULL) { - entry->count = 0; - next_entry = entry->next; - if ((now - entry->time) > entry->ctrl_info.holding_time) { - dprintk("holding time expired, ip = %pI4\n", - &entry->ctrl_info.in_dst_ip); - client->in_ops->remove_entry(entry, client); - } - entry = next_entry; - } - write_unlock_bh(&client->ingress_lock); -} - -/* Call this every MPC-p4 seconds. */ -static void check_resolving_entries(struct mpoa_client *client) -{ - - struct atm_mpoa_qos *qos; - in_cache_entry *entry; - time64_t now; - struct k_message msg; - - now = ktime_get_seconds(); - - read_lock_bh(&client->ingress_lock); - entry = client->in_cache; - while (entry != NULL) { - if (entry->entry_state == INGRESS_RESOLVING) { - - if ((now - entry->hold_down) - < client->parameters.mpc_p6) { - entry = entry->next; /* Entry in hold down */ - continue; - } - if ((now - entry->reply_wait) > entry->retry_time) { - entry->retry_time = MPC_C1 * (entry->retry_time); - /* - * Retry time maximum exceeded, - * put entry in hold down. - */ - if (entry->retry_time > client->parameters.mpc_p5) { - entry->hold_down = ktime_get_seconds(); - entry->retry_time = client->parameters.mpc_p4; - entry = entry->next; - continue; - } - /* Ask daemon to send a resolution request. */ - memset(&entry->hold_down, 0, sizeof(time64_t)); - msg.type = SND_MPOA_RES_RTRY; - memcpy(msg.MPS_ctrl, client->mps_ctrl_addr, ATM_ESA_LEN); - msg.content.in_info = entry->ctrl_info; - qos = atm_mpoa_search_qos(entry->ctrl_info.in_dst_ip); - if (qos != NULL) - msg.qos = qos->qos; - msg_to_mpoad(&msg, client); - entry->reply_wait = ktime_get_seconds(); - } - } - entry = entry->next; - } - read_unlock_bh(&client->ingress_lock); -} - -/* Call this every MPC-p5 seconds. */ -static void refresh_entries(struct mpoa_client *client) -{ - time64_t now; - struct in_cache_entry *entry = client->in_cache; - - ddprintk("refresh_entries\n"); - now = ktime_get_seconds(); - - read_lock_bh(&client->ingress_lock); - while (entry != NULL) { - if (entry->entry_state == INGRESS_RESOLVED) { - if (!(entry->refresh_time)) - entry->refresh_time = (2 * (entry->ctrl_info.holding_time))/3; - if ((now - entry->reply_wait) > - entry->refresh_time) { - dprintk("refreshing an entry.\n"); - entry->entry_state = INGRESS_REFRESHING; - - } - } - entry = entry->next; - } - read_unlock_bh(&client->ingress_lock); -} - -static void in_destroy_cache(struct mpoa_client *mpc) -{ - write_lock_irq(&mpc->ingress_lock); - while (mpc->in_cache != NULL) - mpc->in_ops->remove_entry(mpc->in_cache, mpc); - write_unlock_irq(&mpc->ingress_lock); -} - -static eg_cache_entry *eg_cache_get_by_cache_id(__be32 cache_id, - struct mpoa_client *mpc) -{ - eg_cache_entry *entry; - - read_lock_irq(&mpc->egress_lock); - entry = mpc->eg_cache; - while (entry != NULL) { - if (entry->ctrl_info.cache_id == cache_id) { - refcount_inc(&entry->use); - read_unlock_irq(&mpc->egress_lock); - return entry; - } - entry = entry->next; - } - read_unlock_irq(&mpc->egress_lock); - - return NULL; -} - -/* This can be called from any context since it saves CPU flags */ -static eg_cache_entry *eg_cache_get_by_tag(__be32 tag, struct mpoa_client *mpc) -{ - unsigned long flags; - eg_cache_entry *entry; - - read_lock_irqsave(&mpc->egress_lock, flags); - entry = mpc->eg_cache; - while (entry != NULL) { - if (entry->ctrl_info.tag == tag) { - refcount_inc(&entry->use); - read_unlock_irqrestore(&mpc->egress_lock, flags); - return entry; - } - entry = entry->next; - } - read_unlock_irqrestore(&mpc->egress_lock, flags); - - return NULL; -} - -/* This can be called from any context since it saves CPU flags */ -static eg_cache_entry *eg_cache_get_by_vcc(struct atm_vcc *vcc, - struct mpoa_client *mpc) -{ - unsigned long flags; - eg_cache_entry *entry; - - read_lock_irqsave(&mpc->egress_lock, flags); - entry = mpc->eg_cache; - while (entry != NULL) { - if (entry->shortcut == vcc) { - refcount_inc(&entry->use); - read_unlock_irqrestore(&mpc->egress_lock, flags); - return entry; - } - entry = entry->next; - } - read_unlock_irqrestore(&mpc->egress_lock, flags); - - return NULL; -} - -static eg_cache_entry *eg_cache_get_by_src_ip(__be32 ipaddr, - struct mpoa_client *mpc) -{ - eg_cache_entry *entry; - - read_lock_irq(&mpc->egress_lock); - entry = mpc->eg_cache; - while (entry != NULL) { - if (entry->latest_ip_addr == ipaddr) { - refcount_inc(&entry->use); - read_unlock_irq(&mpc->egress_lock); - return entry; - } - entry = entry->next; - } - read_unlock_irq(&mpc->egress_lock); - - return NULL; -} - -static void eg_cache_put(eg_cache_entry *entry) -{ - if (refcount_dec_and_test(&entry->use)) { - kfree_sensitive(entry); - } -} - -/* - * This should be called with write lock on - */ -static void eg_cache_remove_entry(eg_cache_entry *entry, - struct mpoa_client *client) -{ - struct atm_vcc *vcc; - struct k_message msg; - - vcc = entry->shortcut; - dprintk("removing an egress entry.\n"); - if (entry->prev != NULL) - entry->prev->next = entry->next; - else - client->eg_cache = entry->next; - if (entry->next != NULL) - entry->next->prev = entry->prev; - client->eg_ops->put(entry); - if (client->in_cache == NULL && client->eg_cache == NULL) { - msg.type = STOP_KEEP_ALIVE_SM; - msg_to_mpoad(&msg, client); - } - - /* Check if the ingress side still uses this VCC */ - if (vcc != NULL) { - in_cache_entry *in_entry = client->in_ops->get_by_vcc(vcc, client); - if (in_entry != NULL) { - client->in_ops->put(in_entry); - return; - } - vcc_release_async(vcc, -EPIPE); - } -} - -static eg_cache_entry *eg_cache_add_entry(struct k_message *msg, - struct mpoa_client *client) -{ - eg_cache_entry *entry = kzalloc_obj(eg_cache_entry); - - if (entry == NULL) { - pr_info("out of memory\n"); - return NULL; - } - - dprintk("adding an egress entry, ip = %pI4, this should be our IP\n", - &msg->content.eg_info.eg_dst_ip); - - refcount_set(&entry->use, 1); - dprintk("new_eg_cache_entry: about to lock\n"); - write_lock_irq(&client->egress_lock); - entry->next = client->eg_cache; - entry->prev = NULL; - if (client->eg_cache != NULL) - client->eg_cache->prev = entry; - client->eg_cache = entry; - - memcpy(entry->MPS_ctrl_ATM_addr, client->mps_ctrl_addr, ATM_ESA_LEN); - entry->ctrl_info = msg->content.eg_info; - entry->time = ktime_get_seconds(); - entry->entry_state = EGRESS_RESOLVED; - dprintk("new_eg_cache_entry cache_id %u\n", - ntohl(entry->ctrl_info.cache_id)); - dprintk("mps_ip = %pI4\n", &entry->ctrl_info.mps_ip); - refcount_inc(&entry->use); - - write_unlock_irq(&client->egress_lock); - dprintk("new_eg_cache_entry: unlocked\n"); - - return entry; -} - -static void update_eg_cache_entry(eg_cache_entry *entry, uint16_t holding_time) -{ - entry->time = ktime_get_seconds(); - entry->entry_state = EGRESS_RESOLVED; - entry->ctrl_info.holding_time = holding_time; -} - -static void clear_expired(struct mpoa_client *client) -{ - eg_cache_entry *entry, *next_entry; - time64_t now; - struct k_message msg; - - now = ktime_get_seconds(); - - write_lock_irq(&client->egress_lock); - entry = client->eg_cache; - while (entry != NULL) { - next_entry = entry->next; - if ((now - entry->time) > entry->ctrl_info.holding_time) { - msg.type = SND_EGRESS_PURGE; - msg.content.eg_info = entry->ctrl_info; - dprintk("egress_cache: holding time expired, cache_id = %u.\n", - ntohl(entry->ctrl_info.cache_id)); - msg_to_mpoad(&msg, client); - client->eg_ops->remove_entry(entry, client); - } - entry = next_entry; - } - write_unlock_irq(&client->egress_lock); -} - -static void eg_destroy_cache(struct mpoa_client *mpc) -{ - write_lock_irq(&mpc->egress_lock); - while (mpc->eg_cache != NULL) - mpc->eg_ops->remove_entry(mpc->eg_cache, mpc); - write_unlock_irq(&mpc->egress_lock); -} - - -static const struct in_cache_ops ingress_ops = { - .add_entry = in_cache_add_entry, - .get = in_cache_get, - .get_with_mask = in_cache_get_with_mask, - .get_by_vcc = in_cache_get_by_vcc, - .put = in_cache_put, - .remove_entry = in_cache_remove_entry, - .cache_hit = cache_hit, - .clear_count = clear_count_and_expired, - .check_resolving = check_resolving_entries, - .refresh = refresh_entries, - .destroy_cache = in_destroy_cache -}; - -static const struct eg_cache_ops egress_ops = { - .add_entry = eg_cache_add_entry, - .get_by_cache_id = eg_cache_get_by_cache_id, - .get_by_tag = eg_cache_get_by_tag, - .get_by_vcc = eg_cache_get_by_vcc, - .get_by_src_ip = eg_cache_get_by_src_ip, - .put = eg_cache_put, - .remove_entry = eg_cache_remove_entry, - .update = update_eg_cache_entry, - .clear_expired = clear_expired, - .destroy_cache = eg_destroy_cache -}; - -void atm_mpoa_init_cache(struct mpoa_client *mpc) -{ - mpc->in_ops = &ingress_ops; - mpc->eg_ops = &egress_ops; -} diff --git a/net/atm/mpoa_caches.h b/net/atm/mpoa_caches.h deleted file mode 100644 index 464c4c7f8d1f..000000000000 --- a/net/atm/mpoa_caches.h +++ /dev/null @@ -1,99 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef MPOA_CACHES_H -#define MPOA_CACHES_H - -#include <linux/time64.h> -#include <linux/netdevice.h> -#include <linux/types.h> -#include <linux/atm.h> -#include <linux/atmdev.h> -#include <linux/atmmpc.h> -#include <linux/refcount.h> - -struct mpoa_client; - -void atm_mpoa_init_cache(struct mpoa_client *mpc); - -typedef struct in_cache_entry { - struct in_cache_entry *next; - struct in_cache_entry *prev; - time64_t time; - time64_t reply_wait; - time64_t hold_down; - uint32_t packets_fwded; - uint16_t entry_state; - uint32_t retry_time; - uint32_t refresh_time; - uint32_t count; - struct atm_vcc *shortcut; - uint8_t MPS_ctrl_ATM_addr[ATM_ESA_LEN]; - struct in_ctrl_info ctrl_info; - refcount_t use; -} in_cache_entry; - -struct in_cache_ops{ - in_cache_entry *(*add_entry)(__be32 dst_ip, - struct mpoa_client *client); - in_cache_entry *(*get)(__be32 dst_ip, struct mpoa_client *client); - in_cache_entry *(*get_with_mask)(__be32 dst_ip, - struct mpoa_client *client, - __be32 mask); - in_cache_entry *(*get_by_vcc)(struct atm_vcc *vcc, - struct mpoa_client *client); - void (*put)(in_cache_entry *entry); - void (*remove_entry)(in_cache_entry *delEntry, - struct mpoa_client *client ); - int (*cache_hit)(in_cache_entry *entry, - struct mpoa_client *client); - void (*clear_count)(struct mpoa_client *client); - void (*check_resolving)(struct mpoa_client *client); - void (*refresh)(struct mpoa_client *client); - void (*destroy_cache)(struct mpoa_client *mpc); -}; - -typedef struct eg_cache_entry{ - struct eg_cache_entry *next; - struct eg_cache_entry *prev; - time64_t time; - uint8_t MPS_ctrl_ATM_addr[ATM_ESA_LEN]; - struct atm_vcc *shortcut; - uint32_t packets_rcvd; - uint16_t entry_state; - __be32 latest_ip_addr; /* The src IP address of the last packet */ - struct eg_ctrl_info ctrl_info; - refcount_t use; -} eg_cache_entry; - -struct eg_cache_ops{ - eg_cache_entry *(*add_entry)(struct k_message *msg, struct mpoa_client *client); - eg_cache_entry *(*get_by_cache_id)(__be32 cache_id, struct mpoa_client *client); - eg_cache_entry *(*get_by_tag)(__be32 cache_id, struct mpoa_client *client); - eg_cache_entry *(*get_by_vcc)(struct atm_vcc *vcc, struct mpoa_client *client); - eg_cache_entry *(*get_by_src_ip)(__be32 ipaddr, struct mpoa_client *client); - void (*put)(eg_cache_entry *entry); - void (*remove_entry)(eg_cache_entry *entry, struct mpoa_client *client); - void (*update)(eg_cache_entry *entry, uint16_t holding_time); - void (*clear_expired)(struct mpoa_client *client); - void (*destroy_cache)(struct mpoa_client *mpc); -}; - - -/* Ingress cache entry states */ - -#define INGRESS_REFRESHING 3 -#define INGRESS_RESOLVED 2 -#define INGRESS_RESOLVING 1 -#define INGRESS_INVALID 0 - -/* VCC states */ - -#define OPEN 1 -#define CLOSED 0 - -/* Egress cache entry states */ - -#define EGRESS_RESOLVED 2 -#define EGRESS_PURGE 1 -#define EGRESS_INVALID 0 - -#endif diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c deleted file mode 100644 index aaf64b953915..000000000000 --- a/net/atm/mpoa_proc.c +++ /dev/null @@ -1,307 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ - -#ifdef CONFIG_PROC_FS -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/proc_fs.h> -#include <linux/ktime.h> -#include <linux/seq_file.h> -#include <linux/uaccess.h> -#include <linux/atmmpc.h> -#include <linux/atm.h> -#include <linux/gfp.h> -#include "mpc.h" -#include "mpoa_caches.h" - -/* - * mpoa_proc.c: Implementation MPOA client's proc - * file system statistics - */ - -#if 1 -#define dprintk(format, args...) \ - printk(KERN_DEBUG "mpoa:%s: " format, __FILE__, ##args) /* debug */ -#else -#define dprintk(format, args...) \ - do { if (0) \ - printk(KERN_DEBUG "mpoa:%s: " format, __FILE__, ##args);\ - } while (0) -#endif - -#if 0 -#define ddprintk(format, args...) \ - printk(KERN_DEBUG "mpoa:%s: " format, __FILE__, ##args) /* debug */ -#else -#define ddprintk(format, args...) \ - do { if (0) \ - printk(KERN_DEBUG "mpoa:%s: " format, __FILE__, ##args);\ - } while (0) -#endif - -#define STAT_FILE_NAME "mpc" /* Our statistic file's name */ - -extern struct mpoa_client *mpcs; -extern struct proc_dir_entry *atm_proc_root; /* from proc.c. */ - -static int proc_mpc_open(struct inode *inode, struct file *file); -static ssize_t proc_mpc_write(struct file *file, const char __user *buff, - size_t nbytes, loff_t *ppos); - -static int parse_qos(const char *buff); - -static const struct proc_ops mpc_proc_ops = { - .proc_open = proc_mpc_open, - .proc_read = seq_read, - .proc_lseek = seq_lseek, - .proc_write = proc_mpc_write, - .proc_release = seq_release, -}; - -/* - * Returns the state of an ingress cache entry as a string - */ -static const char *ingress_state_string(int state) -{ - switch (state) { - case INGRESS_RESOLVING: - return "resolving "; - case INGRESS_RESOLVED: - return "resolved "; - case INGRESS_INVALID: - return "invalid "; - case INGRESS_REFRESHING: - return "refreshing "; - } - - return ""; -} - -/* - * Returns the state of an egress cache entry as a string - */ -static const char *egress_state_string(int state) -{ - switch (state) { - case EGRESS_RESOLVED: - return "resolved "; - case EGRESS_PURGE: - return "purge "; - case EGRESS_INVALID: - return "invalid "; - } - - return ""; -} - -/* - * FIXME: mpcs (and per-mpc lists) have no locking whatsoever. - */ - -static void *mpc_start(struct seq_file *m, loff_t *pos) -{ - loff_t l = *pos; - struct mpoa_client *mpc; - - if (!l--) - return SEQ_START_TOKEN; - for (mpc = mpcs; mpc; mpc = mpc->next) - if (!l--) - return mpc; - return NULL; -} - -static void *mpc_next(struct seq_file *m, void *v, loff_t *pos) -{ - struct mpoa_client *p = v; - (*pos)++; - return v == SEQ_START_TOKEN ? mpcs : p->next; -} - -static void mpc_stop(struct seq_file *m, void *v) -{ -} - -/* - * READING function - called when the /proc/atm/mpoa file is read from. - */ -static int mpc_show(struct seq_file *m, void *v) -{ - struct mpoa_client *mpc = v; - int i; - in_cache_entry *in_entry; - eg_cache_entry *eg_entry; - time64_t now; - unsigned char ip_string[16]; - - if (v == SEQ_START_TOKEN) { - atm_mpoa_disp_qos(m); - return 0; - } - - seq_printf(m, "\nInterface %d:\n\n", mpc->dev_num); - seq_printf(m, "Ingress Entries:\nIP address State Holding time Packets fwded VPI VCI\n"); - now = ktime_get_seconds(); - - for (in_entry = mpc->in_cache; in_entry; in_entry = in_entry->next) { - unsigned long seconds_delta = now - in_entry->time; - - sprintf(ip_string, "%pI4", &in_entry->ctrl_info.in_dst_ip); - seq_printf(m, "%-16s%s%-14lu%-12u", - ip_string, - ingress_state_string(in_entry->entry_state), - in_entry->ctrl_info.holding_time - - seconds_delta, - in_entry->packets_fwded); - if (in_entry->shortcut) - seq_printf(m, " %-3d %-3d", - in_entry->shortcut->vpi, - in_entry->shortcut->vci); - seq_printf(m, "\n"); - } - - seq_printf(m, "\n"); - seq_printf(m, "Egress Entries:\nIngress MPC ATM addr\nCache-id State Holding time Packets recvd Latest IP addr VPI VCI\n"); - for (eg_entry = mpc->eg_cache; eg_entry; eg_entry = eg_entry->next) { - unsigned char *p = eg_entry->ctrl_info.in_MPC_data_ATM_addr; - unsigned long seconds_delta = now - eg_entry->time; - - for (i = 0; i < ATM_ESA_LEN; i++) - seq_printf(m, "%02x", p[i]); - seq_printf(m, "\n%-16lu%s%-14lu%-15u", - (unsigned long)ntohl(eg_entry->ctrl_info.cache_id), - egress_state_string(eg_entry->entry_state), - (eg_entry->ctrl_info.holding_time - seconds_delta), - eg_entry->packets_rcvd); - - /* latest IP address */ - sprintf(ip_string, "%pI4", &eg_entry->latest_ip_addr); - seq_printf(m, "%-16s", ip_string); - - if (eg_entry->shortcut) - seq_printf(m, " %-3d %-3d", - eg_entry->shortcut->vpi, - eg_entry->shortcut->vci); - seq_printf(m, "\n"); - } - seq_printf(m, "\n"); - return 0; -} - -static const struct seq_operations mpc_op = { - .start = mpc_start, - .next = mpc_next, - .stop = mpc_stop, - .show = mpc_show -}; - -static int proc_mpc_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &mpc_op); -} - -static ssize_t proc_mpc_write(struct file *file, const char __user *buff, - size_t nbytes, loff_t *ppos) -{ - char *page, *p; - unsigned int len; - - if (nbytes == 0) - return 0; - - if (nbytes >= PAGE_SIZE) - nbytes = PAGE_SIZE-1; - - page = (char *)__get_free_page(GFP_KERNEL); - if (!page) - return -ENOMEM; - - for (p = page, len = 0; len < nbytes; p++) { - if (get_user(*p, buff++)) { - free_page((unsigned long)page); - return -EFAULT; - } - len += 1; - if (*p == '\0' || *p == '\n') - break; - } - - *p = '\0'; - - if (!parse_qos(page)) - printk("mpoa: proc_mpc_write: could not parse '%s'\n", page); - - free_page((unsigned long)page); - - return len; -} - -static int parse_qos(const char *buff) -{ - /* possible lines look like this - * add 130.230.54.142 tx=max_pcr,max_sdu rx=max_pcr,max_sdu - */ - unsigned char ip[4]; - int tx_pcr, tx_sdu, rx_pcr, rx_sdu; - __be32 ipaddr; - struct atm_qos qos; - - memset(&qos, 0, sizeof(struct atm_qos)); - - if (sscanf(buff, "del %hhu.%hhu.%hhu.%hhu", - ip, ip+1, ip+2, ip+3) == 4) { - ipaddr = *(__be32 *)ip; - return atm_mpoa_delete_qos(atm_mpoa_search_qos(ipaddr)); - } - - if (sscanf(buff, "add %hhu.%hhu.%hhu.%hhu tx=%d,%d rx=tx", - ip, ip+1, ip+2, ip+3, &tx_pcr, &tx_sdu) == 6) { - rx_pcr = tx_pcr; - rx_sdu = tx_sdu; - } else if (sscanf(buff, "add %hhu.%hhu.%hhu.%hhu tx=%d,%d rx=%d,%d", - ip, ip+1, ip+2, ip+3, &tx_pcr, &tx_sdu, &rx_pcr, &rx_sdu) != 8) - return 0; - - ipaddr = *(__be32 *)ip; - qos.txtp.traffic_class = ATM_CBR; - qos.txtp.max_pcr = tx_pcr; - qos.txtp.max_sdu = tx_sdu; - qos.rxtp.traffic_class = ATM_CBR; - qos.rxtp.max_pcr = rx_pcr; - qos.rxtp.max_sdu = rx_sdu; - qos.aal = ATM_AAL5; - dprintk("parse_qos(): setting qos parameters to tx=%d,%d rx=%d,%d\n", - qos.txtp.max_pcr, qos.txtp.max_sdu, - qos.rxtp.max_pcr, qos.rxtp.max_sdu); - - atm_mpoa_add_qos(ipaddr, &qos); - return 1; -} - -/* - * INITIALIZATION function - called when module is initialized/loaded. - */ -int mpc_proc_init(void) -{ - struct proc_dir_entry *p; - - p = proc_create(STAT_FILE_NAME, 0, atm_proc_root, &mpc_proc_ops); - if (!p) { - pr_err("Unable to initialize /proc/atm/%s\n", STAT_FILE_NAME); - return -ENOMEM; - } - return 0; -} - -/* - * DELETING function - called when module is removed. - */ -void mpc_proc_clean(void) -{ - remove_proc_entry(STAT_FILE_NAME, atm_proc_root); -} - -#endif /* CONFIG_PROC_FS */ diff --git a/net/atm/proc.c b/net/atm/proc.c index 9bf736290e48..b650da764a23 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -21,11 +21,9 @@ #include <linux/atm.h> #include <linux/atmdev.h> #include <linux/netdevice.h> -#include <linux/atmclip.h> #include <linux/init.h> /* for __init */ #include <linux/slab.h> #include <net/net_namespace.h> -#include <net/atmclip.h> #include <linux/uaccess.h> #include <linux/param.h> /* for HZ */ #include <linux/atomic.h> @@ -155,15 +153,6 @@ static void pvc_info(struct seq_file *seq, struct atm_vcc *vcc) class_name[vcc->qos.rxtp.traffic_class], vcc->qos.txtp.min_pcr, class_name[vcc->qos.txtp.traffic_class]); - if (test_bit(ATM_VF_IS_CLIP, &vcc->flags)) { - struct clip_vcc *clip_vcc = CLIP_VCC(vcc); - struct net_device *dev; - - dev = clip_vcc->entry ? clip_vcc->entry->neigh->dev : NULL; - seq_printf(seq, "CLIP, Itf:%s, Encap:", - dev ? dev->name : "none?"); - seq_printf(seq, "%s", clip_vcc->encap ? "LLC/SNAP" : "None"); - } seq_putc(seq, '\n'); } diff --git a/net/atm/signaling.c b/net/atm/signaling.c index 358fbe5e4d1d..b991d937205a 100644 --- a/net/atm/signaling.c +++ b/net/atm/signaling.c @@ -179,6 +179,7 @@ as_indicate_complete: break; default: pr_alert("bad message type %d\n", (int)msg->type); + dev_kfree_skb(skb); /* Paired with find_get_vcc(msg->vcc) above */ sock_put(sk); return -EINVAL; diff --git a/net/ax25/Kconfig b/net/ax25/Kconfig deleted file mode 100644 index 310169ce1488..000000000000 --- a/net/ax25/Kconfig +++ /dev/null @@ -1,108 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# Amateur Radio protocols and AX.25 device configuration -# - -menuconfig HAMRADIO - depends on NET - bool "Amateur Radio support" - help - If you want to connect your Linux box to an amateur radio, answer Y - here. You want to read <https://www.tapr.org/> - and more specifically about AX.25 on Linux - <https://linux-ax25.in-berlin.de>. - - Note that the answer to this question won't directly affect the - kernel: saying N will just cause the configurator to skip all - the questions about amateur radio. - -comment "Packet Radio protocols" - depends on HAMRADIO - -config AX25 - tristate "Amateur Radio AX.25 Level 2 protocol" - depends on HAMRADIO - help - This is the protocol used for computer communication over amateur - radio. It is either used by itself for point-to-point links, or to - carry other protocols such as tcp/ip. To use it, you need a device - that connects your Linux box to your amateur radio. You can either - use a low speed TNC (a Terminal Node Controller acts as a kind of - modem connecting your computer's serial port to your radio's - microphone input and speaker output) supporting the KISS protocol or - one of the various SCC cards that are supported by the generic Z8530 - or the DMA SCC driver. Another option are the Baycom modem serial - and parallel port hacks or the sound card modem (supported by their - own drivers). If you say Y here, you also have to say Y to one of - those drivers. - - Information about where to get supporting software for Linux amateur - radio as well as information about how to configure an AX.25 port is - contained in the AX25-HOWTO, available from - <https://www.tldp.org/docs.html#howto>. You might also want to - check out the file <file:Documentation/networking/ax25.rst> in the - kernel source. More information about digital amateur radio in - general is on the WWW at - <https://www.tapr.org/>. - - To compile this driver as a module, choose M here: the - module will be called ax25. - -config AX25_DAMA_SLAVE - bool "AX.25 DAMA Slave support" - default y - depends on AX25 - help - DAMA is a mechanism to prevent collisions when doing AX.25 - networking. A DAMA server (called "master") accepts incoming traffic - from clients (called "slaves") and redistributes it to other slaves. - If you say Y here, your Linux box will act as a DAMA slave; this is - transparent in that you don't have to do any special DAMA - configuration. Linux cannot yet act as a DAMA server. This option - only compiles DAMA slave support into the kernel. It still needs to - be enabled at runtime. For more about DAMA see - <https://linux-ax25.in-berlin.de>. If unsure, say Y. - -config NETROM - tristate "Amateur Radio NET/ROM protocol" - depends on AX25 - help - NET/ROM is a network layer protocol on top of AX.25 useful for - routing. - - A comprehensive listing of all the software for Linux amateur radio - users as well as information about how to configure an AX.25 port is - contained in the Linux Ham Wiki, available from - <https://linux-ax25.in-berlin.de>. You also might want to check out - the file <file:Documentation/networking/ax25.rst>. More information - about digital amateur radio in general is on the WWW at - <https://www.tapr.org/>. - - To compile this driver as a module, choose M here: the - module will be called netrom. - -config ROSE - tristate "Amateur Radio X.25 PLP (Rose)" - depends on AX25 - help - The Packet Layer Protocol (PLP) is a way to route packets over X.25 - connections in general and amateur radio AX.25 connections in - particular, essentially an alternative to NET/ROM. - - A comprehensive listing of all the software for Linux amateur radio - users as well as information about how to configure an AX.25 port is - contained in the Linux Ham Wiki, available from - <https://linux-ax25.in-berlin.de>. You also might want to check out - the file <file:Documentation/networking/ax25.rst>. More information - about digital amateur radio in general is on the WWW at - <https://www.tapr.org/>. - - To compile this driver as a module, choose M here: the - module will be called rose. - -menu "AX.25 network device drivers" - depends on HAMRADIO && AX25 - -source "drivers/net/hamradio/Kconfig" - -endmenu diff --git a/net/ax25/Makefile b/net/ax25/Makefile deleted file mode 100644 index 2e53affc8568..000000000000 --- a/net/ax25/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Makefile for the Linux AX.25 layer. -# - -obj-$(CONFIG_AX25) += ax25.o - -ax25-y := ax25_addr.o ax25_dev.o ax25_iface.o ax25_in.o ax25_ip.o ax25_out.o \ - ax25_route.o ax25_std_in.o ax25_std_subr.o ax25_std_timer.o \ - ax25_subr.o ax25_timer.o ax25_uid.o af_ax25.o -ax25-$(CONFIG_AX25_DAMA_SLAVE) += ax25_ds_in.o ax25_ds_subr.o ax25_ds_timer.o -ax25-$(CONFIG_SYSCTL) += sysctl_net_ax25.o diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c deleted file mode 100644 index 9d236e64f5f5..000000000000 --- a/net/ax25/af_ax25.c +++ /dev/null @@ -1,2089 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk) - * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - * Copyright (C) Darryl Miles G7LED (dlm@g7led.demon.co.uk) - * Copyright (C) Steven Whitehouse GW7RRM (stevew@acm.org) - * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de) - * Copyright (C) Hans-Joachim Hetscher DD8NE (dd8ne@bnv-bamberg.de) - * Copyright (C) Hans Alblas PE1AYX (hans@esrac.ele.tue.nl) - * Copyright (C) Frederic Rible F1OAT (frible@teaser.fr) - */ -#include <linux/capability.h> -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/sched/signal.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/slab.h> -#include <net/ax25.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/if_arp.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <linux/uaccess.h> -#include <linux/fcntl.h> -#include <linux/termios.h> /* For TIOCINQ/OUTQ */ -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <linux/notifier.h> -#include <linux/proc_fs.h> -#include <linux/stat.h> -#include <linux/sysctl.h> -#include <linux/init.h> -#include <linux/spinlock.h> -#include <net/net_namespace.h> -#include <net/tcp_states.h> -#include <net/ip.h> -#include <net/arp.h> - - - -HLIST_HEAD(ax25_list); -DEFINE_SPINLOCK(ax25_list_lock); - -static const struct proto_ops ax25_proto_ops; - -static void ax25_free_sock(struct sock *sk) -{ - ax25_cb_put(sk_to_ax25(sk)); -} - -/* - * Socket removal during an interrupt is now safe. - */ -static void ax25_cb_del(ax25_cb *ax25) -{ - spin_lock_bh(&ax25_list_lock); - if (!hlist_unhashed(&ax25->ax25_node)) { - hlist_del_init(&ax25->ax25_node); - ax25_cb_put(ax25); - } - spin_unlock_bh(&ax25_list_lock); -} - -/* - * Kill all bound sockets on a dropped device. - */ -static void ax25_kill_by_device(struct net_device *dev) -{ - ax25_dev *ax25_dev; - ax25_cb *s; - struct sock *sk; - - if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) - return; - ax25_dev->device_up = false; - - spin_lock_bh(&ax25_list_lock); -again: - ax25_for_each(s, &ax25_list) { - if (s->ax25_dev == ax25_dev) { - sk = s->sk; - if (!sk) { - spin_unlock_bh(&ax25_list_lock); - ax25_disconnect(s, ENETUNREACH); - s->ax25_dev = NULL; - ax25_cb_del(s); - spin_lock_bh(&ax25_list_lock); - goto again; - } - sock_hold(sk); - spin_unlock_bh(&ax25_list_lock); - lock_sock(sk); - ax25_disconnect(s, ENETUNREACH); - s->ax25_dev = NULL; - if (sk->sk_socket) { - netdev_put(ax25_dev->dev, - &s->dev_tracker); - ax25_dev_put(ax25_dev); - } - ax25_cb_del(s); - release_sock(sk); - spin_lock_bh(&ax25_list_lock); - sock_put(sk); - /* The entry could have been deleted from the - * list meanwhile and thus the next pointer is - * no longer valid. Play it safe and restart - * the scan. Forward progress is ensured - * because we set s->ax25_dev to NULL and we - * are never passed a NULL 'dev' argument. - */ - goto again; - } - } - spin_unlock_bh(&ax25_list_lock); -} - -/* - * Handle device status changes. - */ -static int ax25_device_event(struct notifier_block *this, unsigned long event, - void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; - - /* Reject non AX.25 devices */ - if (dev->type != ARPHRD_AX25) - return NOTIFY_DONE; - - switch (event) { - case NETDEV_UP: - ax25_dev_device_up(dev); - break; - case NETDEV_DOWN: - ax25_kill_by_device(dev); - ax25_rt_device_down(dev); - ax25_dev_device_down(dev); - break; - default: - break; - } - - return NOTIFY_DONE; -} - -/* - * Add a socket to the bound sockets list. - */ -void ax25_cb_add(ax25_cb *ax25) -{ - spin_lock_bh(&ax25_list_lock); - ax25_cb_hold(ax25); - hlist_add_head(&ax25->ax25_node, &ax25_list); - spin_unlock_bh(&ax25_list_lock); -} - -/* - * Find a socket that wants to accept the SABM we have just - * received. - */ -struct sock *ax25_find_listener(ax25_address *addr, int digi, - struct net_device *dev, int type) -{ - ax25_cb *s; - - spin_lock(&ax25_list_lock); - ax25_for_each(s, &ax25_list) { - if ((s->iamdigi && !digi) || (!s->iamdigi && digi)) - continue; - if (s->sk && !ax25cmp(&s->source_addr, addr) && - s->sk->sk_type == type && s->sk->sk_state == TCP_LISTEN) { - /* If device is null we match any device */ - if (s->ax25_dev == NULL || s->ax25_dev->dev == dev) { - sock_hold(s->sk); - spin_unlock(&ax25_list_lock); - return s->sk; - } - } - } - spin_unlock(&ax25_list_lock); - - return NULL; -} - -/* - * Find an AX.25 socket given both ends. - */ -struct sock *ax25_get_socket(ax25_address *my_addr, ax25_address *dest_addr, - int type) -{ - struct sock *sk = NULL; - ax25_cb *s; - - spin_lock(&ax25_list_lock); - ax25_for_each(s, &ax25_list) { - if (s->sk && !ax25cmp(&s->source_addr, my_addr) && - !ax25cmp(&s->dest_addr, dest_addr) && - s->sk->sk_type == type) { - sk = s->sk; - sock_hold(sk); - break; - } - } - - spin_unlock(&ax25_list_lock); - - return sk; -} - -/* - * Find an AX.25 control block given both ends. It will only pick up - * floating AX.25 control blocks or non Raw socket bound control blocks. - */ -ax25_cb *ax25_find_cb(const ax25_address *src_addr, ax25_address *dest_addr, - ax25_digi *digi, struct net_device *dev) -{ - ax25_cb *s; - - spin_lock_bh(&ax25_list_lock); - ax25_for_each(s, &ax25_list) { - if (s->sk && s->sk->sk_type != SOCK_SEQPACKET) - continue; - if (s->ax25_dev == NULL) - continue; - if (ax25cmp(&s->source_addr, src_addr) == 0 && ax25cmp(&s->dest_addr, dest_addr) == 0 && s->ax25_dev->dev == dev) { - if (digi != NULL && digi->ndigi != 0) { - if (s->digipeat == NULL) - continue; - if (ax25digicmp(s->digipeat, digi) != 0) - continue; - } else { - if (s->digipeat != NULL && s->digipeat->ndigi != 0) - continue; - } - ax25_cb_hold(s); - spin_unlock_bh(&ax25_list_lock); - - return s; - } - } - spin_unlock_bh(&ax25_list_lock); - - return NULL; -} - -EXPORT_SYMBOL(ax25_find_cb); - -void ax25_send_to_raw(ax25_address *addr, struct sk_buff *skb, int proto) -{ - ax25_cb *s; - struct sk_buff *copy; - - spin_lock(&ax25_list_lock); - ax25_for_each(s, &ax25_list) { - if (s->sk != NULL && ax25cmp(&s->source_addr, addr) == 0 && - s->sk->sk_type == SOCK_RAW && - s->sk->sk_protocol == proto && - s->ax25_dev->dev == skb->dev && - atomic_read(&s->sk->sk_rmem_alloc) <= s->sk->sk_rcvbuf) { - if ((copy = skb_clone(skb, GFP_ATOMIC)) == NULL) - continue; - if (sock_queue_rcv_skb(s->sk, copy) != 0) - kfree_skb(copy); - } - } - spin_unlock(&ax25_list_lock); -} - -/* - * Deferred destroy. - */ -void ax25_destroy_socket(ax25_cb *); - -/* - * Handler for deferred kills. - */ -static void ax25_destroy_timer(struct timer_list *t) -{ - ax25_cb *ax25 = timer_container_of(ax25, t, dtimer); - struct sock *sk; - - sk=ax25->sk; - - bh_lock_sock(sk); - sock_hold(sk); - ax25_destroy_socket(ax25); - bh_unlock_sock(sk); - sock_put(sk); -} - -/* - * This is called from user mode and the timers. Thus it protects itself - * against interrupt users but doesn't worry about being called during - * work. Once it is removed from the queue no interrupt or bottom half - * will touch it and we are (fairly 8-) ) safe. - */ -void ax25_destroy_socket(ax25_cb *ax25) -{ - struct sk_buff *skb; - - ax25_cb_del(ax25); - - ax25_stop_heartbeat(ax25); - ax25_stop_t1timer(ax25); - ax25_stop_t2timer(ax25); - ax25_stop_t3timer(ax25); - ax25_stop_idletimer(ax25); - - ax25_clear_queues(ax25); /* Flush the queues */ - - if (ax25->sk != NULL) { - while ((skb = skb_dequeue(&ax25->sk->sk_receive_queue)) != NULL) { - if (skb->sk != ax25->sk) { - /* A pending connection */ - ax25_cb *sax25 = sk_to_ax25(skb->sk); - - /* Queue the unaccepted socket for death */ - sock_orphan(skb->sk); - - /* 9A4GL: hack to release unaccepted sockets */ - skb->sk->sk_state = TCP_LISTEN; - - ax25_start_heartbeat(sax25); - sax25->state = AX25_STATE_0; - } - - kfree_skb(skb); - } - skb_queue_purge(&ax25->sk->sk_write_queue); - } - - if (ax25->sk != NULL) { - if (sk_has_allocations(ax25->sk)) { - /* Defer: outstanding buffers */ - timer_setup(&ax25->dtimer, ax25_destroy_timer, 0); - ax25->dtimer.expires = jiffies + 2 * HZ; - add_timer(&ax25->dtimer); - } else { - struct sock *sk=ax25->sk; - ax25->sk=NULL; - sock_put(sk); - } - } else { - ax25_cb_put(ax25); - } -} - -/* - * dl1bke 960311: set parameters for existing AX.25 connections, - * includes a KILL command to abort any connection. - * VERY useful for debugging ;-) - */ -static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg) -{ - struct ax25_ctl_struct ax25_ctl; - ax25_digi digi; - ax25_dev *ax25_dev; - ax25_cb *ax25; - unsigned int k; - int ret = 0; - - if (copy_from_user(&ax25_ctl, arg, sizeof(ax25_ctl))) - return -EFAULT; - - if (ax25_ctl.digi_count > AX25_MAX_DIGIS) - return -EINVAL; - - if (ax25_ctl.arg > ULONG_MAX / HZ && ax25_ctl.cmd != AX25_KILL) - return -EINVAL; - - ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr); - if (!ax25_dev) - return -ENODEV; - - digi.ndigi = ax25_ctl.digi_count; - for (k = 0; k < digi.ndigi; k++) - digi.calls[k] = ax25_ctl.digi_addr[k]; - - ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev); - if (!ax25) { - ax25_dev_put(ax25_dev); - return -ENOTCONN; - } - - switch (ax25_ctl.cmd) { - case AX25_KILL: - ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND); -#ifdef CONFIG_AX25_DAMA_SLAVE - if (ax25_dev->dama.slave && ax25->ax25_dev->values[AX25_VALUES_PROTOCOL] == AX25_PROTO_DAMA_SLAVE) - ax25_dama_off(ax25); -#endif - ax25_disconnect(ax25, ENETRESET); - break; - - case AX25_WINDOW: - if (ax25->modulus == AX25_MODULUS) { - if (ax25_ctl.arg < 1 || ax25_ctl.arg > 7) - goto einval_put; - } else { - if (ax25_ctl.arg < 1 || ax25_ctl.arg > 63) - goto einval_put; - } - ax25->window = ax25_ctl.arg; - break; - - case AX25_T1: - if (ax25_ctl.arg < 1 || ax25_ctl.arg > ULONG_MAX / HZ) - goto einval_put; - ax25->rtt = (ax25_ctl.arg * HZ) / 2; - ax25->t1 = ax25_ctl.arg * HZ; - break; - - case AX25_T2: - if (ax25_ctl.arg < 1 || ax25_ctl.arg > ULONG_MAX / HZ) - goto einval_put; - ax25->t2 = ax25_ctl.arg * HZ; - break; - - case AX25_N2: - if (ax25_ctl.arg < 1 || ax25_ctl.arg > 31) - goto einval_put; - ax25->n2count = 0; - ax25->n2 = ax25_ctl.arg; - break; - - case AX25_T3: - if (ax25_ctl.arg > ULONG_MAX / HZ) - goto einval_put; - ax25->t3 = ax25_ctl.arg * HZ; - break; - - case AX25_IDLE: - if (ax25_ctl.arg > ULONG_MAX / (60 * HZ)) - goto einval_put; - - ax25->idle = ax25_ctl.arg * 60 * HZ; - break; - - case AX25_PACLEN: - if (ax25_ctl.arg < 16 || ax25_ctl.arg > 65535) - goto einval_put; - ax25->paclen = ax25_ctl.arg; - break; - - default: - goto einval_put; - } - -out_put: - ax25_dev_put(ax25_dev); - ax25_cb_put(ax25); - return ret; - -einval_put: - ret = -EINVAL; - goto out_put; -} - -static void ax25_fillin_cb_from_dev(ax25_cb *ax25, const ax25_dev *ax25_dev) -{ - ax25->rtt = msecs_to_jiffies(ax25_dev->values[AX25_VALUES_T1]) / 2; - ax25->t1 = msecs_to_jiffies(ax25_dev->values[AX25_VALUES_T1]); - ax25->t2 = msecs_to_jiffies(ax25_dev->values[AX25_VALUES_T2]); - ax25->t3 = msecs_to_jiffies(ax25_dev->values[AX25_VALUES_T3]); - ax25->n2 = ax25_dev->values[AX25_VALUES_N2]; - ax25->paclen = ax25_dev->values[AX25_VALUES_PACLEN]; - ax25->idle = msecs_to_jiffies(ax25_dev->values[AX25_VALUES_IDLE]); - ax25->backoff = ax25_dev->values[AX25_VALUES_BACKOFF]; - - if (ax25_dev->values[AX25_VALUES_AXDEFMODE]) { - ax25->modulus = AX25_EMODULUS; - ax25->window = ax25_dev->values[AX25_VALUES_EWINDOW]; - } else { - ax25->modulus = AX25_MODULUS; - ax25->window = ax25_dev->values[AX25_VALUES_WINDOW]; - } -} - -/* - * Fill in a created AX.25 created control block with the default - * values for a particular device. - */ -void ax25_fillin_cb(ax25_cb *ax25, ax25_dev *ax25_dev) -{ - ax25->ax25_dev = ax25_dev; - - if (ax25->ax25_dev != NULL) { - ax25_fillin_cb_from_dev(ax25, ax25_dev); - return; - } - - /* - * No device, use kernel / AX.25 spec default values - */ - ax25->rtt = msecs_to_jiffies(AX25_DEF_T1) / 2; - ax25->t1 = msecs_to_jiffies(AX25_DEF_T1); - ax25->t2 = msecs_to_jiffies(AX25_DEF_T2); - ax25->t3 = msecs_to_jiffies(AX25_DEF_T3); - ax25->n2 = AX25_DEF_N2; - ax25->paclen = AX25_DEF_PACLEN; - ax25->idle = msecs_to_jiffies(AX25_DEF_IDLE); - ax25->backoff = AX25_DEF_BACKOFF; - - if (AX25_DEF_AXDEFMODE) { - ax25->modulus = AX25_EMODULUS; - ax25->window = AX25_DEF_EWINDOW; - } else { - ax25->modulus = AX25_MODULUS; - ax25->window = AX25_DEF_WINDOW; - } -} - -/* - * Create an empty AX.25 control block. - */ -ax25_cb *ax25_create_cb(void) -{ - ax25_cb *ax25; - - if ((ax25 = kzalloc_obj(*ax25, GFP_ATOMIC)) == NULL) - return NULL; - - refcount_set(&ax25->refcount, 1); - - skb_queue_head_init(&ax25->write_queue); - skb_queue_head_init(&ax25->frag_queue); - skb_queue_head_init(&ax25->ack_queue); - skb_queue_head_init(&ax25->reseq_queue); - - ax25_setup_timers(ax25); - - ax25_fillin_cb(ax25, NULL); - - ax25->state = AX25_STATE_0; - - return ax25; -} - -/* - * Handling for system calls applied via the various interfaces to an - * AX25 socket object - */ - -static int ax25_setsockopt(struct socket *sock, int level, int optname, - sockptr_t optval, unsigned int optlen) -{ - struct sock *sk = sock->sk; - ax25_cb *ax25; - struct net_device *dev; - char devname[IFNAMSIZ]; - unsigned int opt; - int res = 0; - - if (level != SOL_AX25) - return -ENOPROTOOPT; - - if (optlen < sizeof(unsigned int)) - return -EINVAL; - - if (copy_from_sockptr(&opt, optval, sizeof(unsigned int))) - return -EFAULT; - - lock_sock(sk); - ax25 = sk_to_ax25(sk); - - switch (optname) { - case AX25_WINDOW: - if (ax25->modulus == AX25_MODULUS) { - if (opt < 1 || opt > 7) { - res = -EINVAL; - break; - } - } else { - if (opt < 1 || opt > 63) { - res = -EINVAL; - break; - } - } - ax25->window = opt; - break; - - case AX25_T1: - if (opt < 1 || opt > UINT_MAX / HZ) { - res = -EINVAL; - break; - } - ax25->rtt = (opt * HZ) >> 1; - ax25->t1 = opt * HZ; - break; - - case AX25_T2: - if (opt < 1 || opt > UINT_MAX / HZ) { - res = -EINVAL; - break; - } - ax25->t2 = opt * HZ; - break; - - case AX25_N2: - if (opt < 1 || opt > 31) { - res = -EINVAL; - break; - } - ax25->n2 = opt; - break; - - case AX25_T3: - if (opt < 1 || opt > UINT_MAX / HZ) { - res = -EINVAL; - break; - } - ax25->t3 = opt * HZ; - break; - - case AX25_IDLE: - if (opt > UINT_MAX / (60 * HZ)) { - res = -EINVAL; - break; - } - ax25->idle = opt * 60 * HZ; - break; - - case AX25_BACKOFF: - if (opt > 2) { - res = -EINVAL; - break; - } - ax25->backoff = opt; - break; - - case AX25_EXTSEQ: - ax25->modulus = opt ? AX25_EMODULUS : AX25_MODULUS; - break; - - case AX25_PIDINCL: - ax25->pidincl = opt ? 1 : 0; - break; - - case AX25_IAMDIGI: - ax25->iamdigi = opt ? 1 : 0; - break; - - case AX25_PACLEN: - if (opt < 16 || opt > 65535) { - res = -EINVAL; - break; - } - ax25->paclen = opt; - break; - - case SO_BINDTODEVICE: - if (optlen > IFNAMSIZ - 1) - optlen = IFNAMSIZ - 1; - - memset(devname, 0, sizeof(devname)); - - if (copy_from_sockptr(devname, optval, optlen)) { - res = -EFAULT; - break; - } - - if (sk->sk_type == SOCK_SEQPACKET && - (sock->state != SS_UNCONNECTED || - sk->sk_state == TCP_LISTEN)) { - res = -EADDRNOTAVAIL; - break; - } - - rcu_read_lock(); - dev = dev_get_by_name_rcu(&init_net, devname); - if (!dev) { - rcu_read_unlock(); - res = -ENODEV; - break; - } - - if (ax25->ax25_dev) { - if (dev == ax25->ax25_dev->dev) { - rcu_read_unlock(); - break; - } - netdev_put(ax25->ax25_dev->dev, &ax25->dev_tracker); - ax25_dev_put(ax25->ax25_dev); - } - - ax25->ax25_dev = ax25_dev_ax25dev(dev); - if (!ax25->ax25_dev) { - rcu_read_unlock(); - res = -ENODEV; - break; - } - ax25_fillin_cb(ax25, ax25->ax25_dev); - netdev_hold(dev, &ax25->dev_tracker, GFP_ATOMIC); - ax25_dev_hold(ax25->ax25_dev); - rcu_read_unlock(); - break; - - default: - res = -ENOPROTOOPT; - } - release_sock(sk); - - return res; -} - -static int ax25_getsockopt(struct socket *sock, int level, int optname, - char __user *optval, int __user *optlen) -{ - struct sock *sk = sock->sk; - ax25_cb *ax25; - struct ax25_dev *ax25_dev; - char devname[IFNAMSIZ]; - void *valptr; - int val = 0; - int maxlen, length; - - if (level != SOL_AX25) - return -ENOPROTOOPT; - - if (get_user(maxlen, optlen)) - return -EFAULT; - - if (maxlen < 1) - return -EFAULT; - - valptr = &val; - length = min_t(unsigned int, maxlen, sizeof(int)); - - lock_sock(sk); - ax25 = sk_to_ax25(sk); - - switch (optname) { - case AX25_WINDOW: - val = ax25->window; - break; - - case AX25_T1: - val = ax25->t1 / HZ; - break; - - case AX25_T2: - val = ax25->t2 / HZ; - break; - - case AX25_N2: - val = ax25->n2; - break; - - case AX25_T3: - val = ax25->t3 / HZ; - break; - - case AX25_IDLE: - val = ax25->idle / (60 * HZ); - break; - - case AX25_BACKOFF: - val = ax25->backoff; - break; - - case AX25_EXTSEQ: - val = (ax25->modulus == AX25_EMODULUS); - break; - - case AX25_PIDINCL: - val = ax25->pidincl; - break; - - case AX25_IAMDIGI: - val = ax25->iamdigi; - break; - - case AX25_PACLEN: - val = ax25->paclen; - break; - - case SO_BINDTODEVICE: - ax25_dev = ax25->ax25_dev; - - if (ax25_dev != NULL && ax25_dev->dev != NULL) { - strscpy(devname, ax25_dev->dev->name, sizeof(devname)); - length = strlen(devname) + 1; - } else { - *devname = '\0'; - length = 1; - } - - valptr = devname; - break; - - default: - release_sock(sk); - return -ENOPROTOOPT; - } - release_sock(sk); - - if (put_user(length, optlen)) - return -EFAULT; - - return copy_to_user(optval, valptr, length) ? -EFAULT : 0; -} - -static int ax25_listen(struct socket *sock, int backlog) -{ - struct sock *sk = sock->sk; - int res = 0; - - lock_sock(sk); - if (sk->sk_type == SOCK_SEQPACKET && sk->sk_state != TCP_LISTEN) { - sk->sk_max_ack_backlog = backlog; - sk->sk_state = TCP_LISTEN; - goto out; - } - res = -EOPNOTSUPP; - -out: - release_sock(sk); - - return res; -} - -/* - * XXX: when creating ax25_sock we should update the .obj_size setting - * below. - */ -static struct proto ax25_proto = { - .name = "AX25", - .owner = THIS_MODULE, - .obj_size = sizeof(struct ax25_sock), -}; - -static int ax25_create(struct net *net, struct socket *sock, int protocol, - int kern) -{ - struct sock *sk; - ax25_cb *ax25; - - if (protocol < 0 || protocol > U8_MAX) - return -EINVAL; - - if (!net_eq(net, &init_net)) - return -EAFNOSUPPORT; - - switch (sock->type) { - case SOCK_DGRAM: - if (protocol == 0 || protocol == PF_AX25) - protocol = AX25_P_TEXT; - break; - - case SOCK_SEQPACKET: - switch (protocol) { - case 0: - case PF_AX25: /* For CLX */ - protocol = AX25_P_TEXT; - break; - case AX25_P_SEGMENT: -#ifdef CONFIG_INET - case AX25_P_ARP: - case AX25_P_IP: -#endif -#ifdef CONFIG_NETROM - case AX25_P_NETROM: -#endif -#ifdef CONFIG_ROSE - case AX25_P_ROSE: -#endif - return -ESOCKTNOSUPPORT; -#ifdef CONFIG_NETROM_MODULE - case AX25_P_NETROM: - if (ax25_protocol_is_registered(AX25_P_NETROM)) - return -ESOCKTNOSUPPORT; - break; -#endif -#ifdef CONFIG_ROSE_MODULE - case AX25_P_ROSE: - if (ax25_protocol_is_registered(AX25_P_ROSE)) - return -ESOCKTNOSUPPORT; - break; -#endif - default: - break; - } - break; - - case SOCK_RAW: - if (!capable(CAP_NET_RAW)) - return -EPERM; - break; - default: - return -ESOCKTNOSUPPORT; - } - - sk = sk_alloc(net, PF_AX25, GFP_ATOMIC, &ax25_proto, kern); - if (sk == NULL) - return -ENOMEM; - - ax25 = ax25_sk(sk)->cb = ax25_create_cb(); - if (!ax25) { - sk_free(sk); - return -ENOMEM; - } - - sock_init_data(sock, sk); - - sk->sk_destruct = ax25_free_sock; - sock->ops = &ax25_proto_ops; - sk->sk_protocol = protocol; - - ax25->sk = sk; - - return 0; -} - -struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev) -{ - struct sock *sk; - ax25_cb *ax25, *oax25; - - sk = sk_alloc(sock_net(osk), PF_AX25, GFP_ATOMIC, osk->sk_prot, 0); - if (sk == NULL) - return NULL; - - if ((ax25 = ax25_create_cb()) == NULL) { - sk_free(sk); - return NULL; - } - - switch (osk->sk_type) { - case SOCK_DGRAM: - break; - case SOCK_SEQPACKET: - break; - default: - sk_free(sk); - ax25_cb_put(ax25); - return NULL; - } - - sock_init_data(NULL, sk); - - sk->sk_type = osk->sk_type; - sk->sk_priority = READ_ONCE(osk->sk_priority); - sk->sk_protocol = osk->sk_protocol; - sk->sk_rcvbuf = osk->sk_rcvbuf; - sk->sk_sndbuf = osk->sk_sndbuf; - sk->sk_state = TCP_ESTABLISHED; - sock_copy_flags(sk, osk); - - oax25 = sk_to_ax25(osk); - - ax25->modulus = oax25->modulus; - ax25->backoff = oax25->backoff; - ax25->pidincl = oax25->pidincl; - ax25->iamdigi = oax25->iamdigi; - ax25->rtt = oax25->rtt; - ax25->t1 = oax25->t1; - ax25->t2 = oax25->t2; - ax25->t3 = oax25->t3; - ax25->n2 = oax25->n2; - ax25->idle = oax25->idle; - ax25->paclen = oax25->paclen; - ax25->window = oax25->window; - - ax25->ax25_dev = ax25_dev; - ax25->source_addr = oax25->source_addr; - - if (oax25->digipeat != NULL) { - ax25->digipeat = kmemdup(oax25->digipeat, sizeof(ax25_digi), - GFP_ATOMIC); - if (ax25->digipeat == NULL) { - sk_free(sk); - ax25_cb_put(ax25); - return NULL; - } - } - - ax25_sk(sk)->cb = ax25; - sk->sk_destruct = ax25_free_sock; - ax25->sk = sk; - - return sk; -} - -static int ax25_release(struct socket *sock) -{ - struct sock *sk = sock->sk; - ax25_cb *ax25; - ax25_dev *ax25_dev; - - if (sk == NULL) - return 0; - - sock_hold(sk); - lock_sock(sk); - sock_orphan(sk); - ax25 = sk_to_ax25(sk); - ax25_dev = ax25->ax25_dev; - - if (sk->sk_type == SOCK_SEQPACKET) { - switch (ax25->state) { - case AX25_STATE_0: - if (!sock_flag(ax25->sk, SOCK_DEAD)) { - release_sock(sk); - ax25_disconnect(ax25, 0); - lock_sock(sk); - } - ax25_destroy_socket(ax25); - break; - - case AX25_STATE_1: - case AX25_STATE_2: - ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND); - release_sock(sk); - ax25_disconnect(ax25, 0); - lock_sock(sk); - if (!sock_flag(ax25->sk, SOCK_DESTROY)) - ax25_destroy_socket(ax25); - break; - - case AX25_STATE_3: - case AX25_STATE_4: - ax25_clear_queues(ax25); - ax25->n2count = 0; - - switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) { - case AX25_PROTO_STD_SIMPLEX: - case AX25_PROTO_STD_DUPLEX: - ax25_send_control(ax25, - AX25_DISC, - AX25_POLLON, - AX25_COMMAND); - ax25_stop_t2timer(ax25); - ax25_stop_t3timer(ax25); - ax25_stop_idletimer(ax25); - break; -#ifdef CONFIG_AX25_DAMA_SLAVE - case AX25_PROTO_DAMA_SLAVE: - ax25_stop_t3timer(ax25); - ax25_stop_idletimer(ax25); - break; -#endif - } - ax25_calculate_t1(ax25); - ax25_start_t1timer(ax25); - ax25->state = AX25_STATE_2; - sk->sk_state = TCP_CLOSE; - sk->sk_shutdown |= SEND_SHUTDOWN; - sk->sk_state_change(sk); - sock_set_flag(sk, SOCK_DESTROY); - break; - - default: - break; - } - } else { - sk->sk_state = TCP_CLOSE; - sk->sk_shutdown |= SEND_SHUTDOWN; - sk->sk_state_change(sk); - ax25_destroy_socket(ax25); - } - if (ax25_dev) { - if (!ax25_dev->device_up) { - timer_delete_sync(&ax25->timer); - timer_delete_sync(&ax25->t1timer); - timer_delete_sync(&ax25->t2timer); - timer_delete_sync(&ax25->t3timer); - timer_delete_sync(&ax25->idletimer); - } - netdev_put(ax25_dev->dev, &ax25->dev_tracker); - ax25_dev_put(ax25_dev); - } - - sock->sk = NULL; - release_sock(sk); - sock_put(sk); - - return 0; -} - -/* - * We support a funny extension here so you can (as root) give any callsign - * digipeated via a local address as source. This hack is obsolete now - * that we've implemented support for SO_BINDTODEVICE. It is however small - * and trivially backward compatible. - */ -static int ax25_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len) -{ - struct sock *sk = sock->sk; - struct full_sockaddr_ax25 *addr = (struct full_sockaddr_ax25 *)uaddr; - ax25_dev *ax25_dev = NULL; - ax25_uid_assoc *user; - ax25_address call; - ax25_cb *ax25; - int err = 0; - - if (addr_len != sizeof(struct sockaddr_ax25) && - addr_len != sizeof(struct full_sockaddr_ax25)) - /* support for old structure may go away some time - * ax25_bind(): uses old (6 digipeater) socket structure. - */ - if ((addr_len < sizeof(struct sockaddr_ax25) + sizeof(ax25_address) * 6) || - (addr_len > sizeof(struct full_sockaddr_ax25))) - return -EINVAL; - - if (addr->fsa_ax25.sax25_family != AF_AX25) - return -EINVAL; - - user = ax25_findbyuid(current_euid()); - if (user) { - call = user->call; - ax25_uid_put(user); - } else { - if (ax25_uid_policy && !capable(CAP_NET_ADMIN)) - return -EACCES; - - call = addr->fsa_ax25.sax25_call; - } - - lock_sock(sk); - - ax25 = sk_to_ax25(sk); - if (!sock_flag(sk, SOCK_ZAPPED)) { - err = -EINVAL; - goto out; - } - - ax25->source_addr = call; - - /* - * User already set interface with SO_BINDTODEVICE - */ - if (ax25->ax25_dev != NULL) - goto done; - - if (addr_len > sizeof(struct sockaddr_ax25) && addr->fsa_ax25.sax25_ndigis == 1) { - if (ax25cmp(&addr->fsa_digipeater[0], &null_ax25_address) != 0 && - (ax25_dev = ax25_addr_ax25dev(&addr->fsa_digipeater[0])) == NULL) { - err = -EADDRNOTAVAIL; - goto out; - } - } else { - if ((ax25_dev = ax25_addr_ax25dev(&addr->fsa_ax25.sax25_call)) == NULL) { - err = -EADDRNOTAVAIL; - goto out; - } - } - - if (ax25_dev) { - ax25_fillin_cb(ax25, ax25_dev); - netdev_hold(ax25_dev->dev, &ax25->dev_tracker, GFP_ATOMIC); - } - -done: - ax25_cb_add(ax25); - sock_reset_flag(sk, SOCK_ZAPPED); - -out: - release_sock(sk); - - return err; -} - -/* - * FIXME: nonblock behaviour looks like it may have a bug. - */ -static int __must_check ax25_connect(struct socket *sock, - struct sockaddr_unsized *uaddr, int addr_len, int flags) -{ - struct sock *sk = sock->sk; - ax25_cb *ax25 = sk_to_ax25(sk), *ax25t; - struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)uaddr; - ax25_digi *digi = NULL; - int ct = 0, err = 0; - - /* - * some sanity checks. code further down depends on this - */ - - if (addr_len == sizeof(struct sockaddr_ax25)) - /* support for this will go away in early 2.5.x - * ax25_connect(): uses obsolete socket structure - */ - ; - else if (addr_len != sizeof(struct full_sockaddr_ax25)) - /* support for old structure may go away some time - * ax25_connect(): uses old (6 digipeater) socket structure. - */ - if ((addr_len < sizeof(struct sockaddr_ax25) + sizeof(ax25_address) * 6) || - (addr_len > sizeof(struct full_sockaddr_ax25))) - return -EINVAL; - - - if (fsa->fsa_ax25.sax25_family != AF_AX25) - return -EINVAL; - - lock_sock(sk); - - /* deal with restarts */ - if (sock->state == SS_CONNECTING) { - switch (sk->sk_state) { - case TCP_SYN_SENT: /* still trying */ - err = -EINPROGRESS; - goto out_release; - - case TCP_ESTABLISHED: /* connection established */ - sock->state = SS_CONNECTED; - goto out_release; - - case TCP_CLOSE: /* connection refused */ - sock->state = SS_UNCONNECTED; - err = -ECONNREFUSED; - goto out_release; - } - } - - if (sk->sk_state == TCP_ESTABLISHED && sk->sk_type == SOCK_SEQPACKET) { - err = -EISCONN; /* No reconnect on a seqpacket socket */ - goto out_release; - } - - sk->sk_state = TCP_CLOSE; - sock->state = SS_UNCONNECTED; - - kfree(ax25->digipeat); - ax25->digipeat = NULL; - - /* - * Handle digi-peaters to be used. - */ - if (addr_len > sizeof(struct sockaddr_ax25) && - fsa->fsa_ax25.sax25_ndigis != 0) { - /* Valid number of digipeaters ? */ - if (fsa->fsa_ax25.sax25_ndigis < 1 || - fsa->fsa_ax25.sax25_ndigis > AX25_MAX_DIGIS || - addr_len < sizeof(struct sockaddr_ax25) + - sizeof(ax25_address) * fsa->fsa_ax25.sax25_ndigis) { - err = -EINVAL; - goto out_release; - } - - if ((digi = kmalloc_obj(ax25_digi)) == NULL) { - err = -ENOBUFS; - goto out_release; - } - - digi->ndigi = fsa->fsa_ax25.sax25_ndigis; - digi->lastrepeat = -1; - - while (ct < fsa->fsa_ax25.sax25_ndigis) { - if ((fsa->fsa_digipeater[ct].ax25_call[6] & - AX25_HBIT) && ax25->iamdigi) { - digi->repeated[ct] = 1; - digi->lastrepeat = ct; - } else { - digi->repeated[ct] = 0; - } - digi->calls[ct] = fsa->fsa_digipeater[ct]; - ct++; - } - } - - /* Must bind first - autobinding does not work. */ - if (sock_flag(sk, SOCK_ZAPPED)) { - kfree(digi); - err = -EINVAL; - goto out_release; - } - - /* Check to see if the device has been filled in, error if it hasn't. */ - if (ax25->ax25_dev == NULL) { - kfree(digi); - err = -EHOSTUNREACH; - goto out_release; - } - - if (sk->sk_type == SOCK_SEQPACKET && - (ax25t=ax25_find_cb(&ax25->source_addr, &fsa->fsa_ax25.sax25_call, digi, - ax25->ax25_dev->dev))) { - kfree(digi); - err = -EADDRINUSE; /* Already such a connection */ - ax25_cb_put(ax25t); - goto out_release; - } - - ax25->dest_addr = fsa->fsa_ax25.sax25_call; - ax25->digipeat = digi; - - /* First the easy one */ - if (sk->sk_type != SOCK_SEQPACKET) { - sock->state = SS_CONNECTED; - sk->sk_state = TCP_ESTABLISHED; - goto out_release; - } - - /* Move to connecting socket, ax.25 lapb WAIT_UA.. */ - sock->state = SS_CONNECTING; - sk->sk_state = TCP_SYN_SENT; - - switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) { - case AX25_PROTO_STD_SIMPLEX: - case AX25_PROTO_STD_DUPLEX: - ax25_std_establish_data_link(ax25); - break; - -#ifdef CONFIG_AX25_DAMA_SLAVE - case AX25_PROTO_DAMA_SLAVE: - ax25->modulus = AX25_MODULUS; - ax25->window = ax25->ax25_dev->values[AX25_VALUES_WINDOW]; - if (ax25->ax25_dev->dama.slave) - ax25_ds_establish_data_link(ax25); - else - ax25_std_establish_data_link(ax25); - break; -#endif - } - - ax25->state = AX25_STATE_1; - - ax25_start_heartbeat(ax25); - - /* Now the loop */ - if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) { - err = -EINPROGRESS; - goto out_release; - } - - if (sk->sk_state == TCP_SYN_SENT) { - DEFINE_WAIT(wait); - - for (;;) { - prepare_to_wait(sk_sleep(sk), &wait, - TASK_INTERRUPTIBLE); - if (sk->sk_state != TCP_SYN_SENT) - break; - if (!signal_pending(current)) { - release_sock(sk); - schedule(); - lock_sock(sk); - continue; - } - err = -ERESTARTSYS; - break; - } - finish_wait(sk_sleep(sk), &wait); - - if (err) - goto out_release; - } - - if (sk->sk_state != TCP_ESTABLISHED) { - /* Not in ABM, not in WAIT_UA -> failed */ - sock->state = SS_UNCONNECTED; - err = sock_error(sk); /* Always set at this point */ - goto out_release; - } - - sock->state = SS_CONNECTED; - - err = 0; -out_release: - release_sock(sk); - - return err; -} - -static int ax25_accept(struct socket *sock, struct socket *newsock, - struct proto_accept_arg *arg) -{ - struct sk_buff *skb; - struct sock *newsk; - ax25_dev *ax25_dev; - DEFINE_WAIT(wait); - struct sock *sk; - ax25_cb *ax25; - int err = 0; - - if (sock->state != SS_UNCONNECTED) - return -EINVAL; - - if ((sk = sock->sk) == NULL) - return -EINVAL; - - lock_sock(sk); - if (sk->sk_type != SOCK_SEQPACKET) { - err = -EOPNOTSUPP; - goto out; - } - - if (sk->sk_state != TCP_LISTEN) { - err = -EINVAL; - goto out; - } - - /* - * The read queue this time is holding sockets ready to use - * hooked into the SABM we saved - */ - for (;;) { - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - skb = skb_dequeue(&sk->sk_receive_queue); - if (skb) - break; - - if (arg->flags & O_NONBLOCK) { - err = -EWOULDBLOCK; - break; - } - if (!signal_pending(current)) { - release_sock(sk); - schedule(); - lock_sock(sk); - continue; - } - err = -ERESTARTSYS; - break; - } - finish_wait(sk_sleep(sk), &wait); - - if (err) - goto out; - - newsk = skb->sk; - sock_graft(newsk, newsock); - - /* Now attach up the new socket */ - kfree_skb(skb); - sk_acceptq_removed(sk); - newsock->state = SS_CONNECTED; - ax25 = sk_to_ax25(newsk); - ax25_dev = ax25->ax25_dev; - netdev_hold(ax25_dev->dev, &ax25->dev_tracker, GFP_ATOMIC); - ax25_dev_hold(ax25_dev); - -out: - release_sock(sk); - - return err; -} - -static int ax25_getname(struct socket *sock, struct sockaddr *uaddr, - int peer) -{ - struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)uaddr; - struct sock *sk = sock->sk; - unsigned char ndigi, i; - ax25_cb *ax25; - int err = 0; - - memset(fsa, 0, sizeof(*fsa)); - lock_sock(sk); - ax25 = sk_to_ax25(sk); - - if (peer != 0) { - if (sk->sk_state != TCP_ESTABLISHED) { - err = -ENOTCONN; - goto out; - } - - fsa->fsa_ax25.sax25_family = AF_AX25; - fsa->fsa_ax25.sax25_call = ax25->dest_addr; - - if (ax25->digipeat != NULL) { - ndigi = ax25->digipeat->ndigi; - fsa->fsa_ax25.sax25_ndigis = ndigi; - for (i = 0; i < ndigi; i++) - fsa->fsa_digipeater[i] = - ax25->digipeat->calls[i]; - } - } else { - fsa->fsa_ax25.sax25_family = AF_AX25; - fsa->fsa_ax25.sax25_call = ax25->source_addr; - fsa->fsa_ax25.sax25_ndigis = 1; - if (ax25->ax25_dev != NULL) { - memcpy(&fsa->fsa_digipeater[0], - ax25->ax25_dev->dev->dev_addr, AX25_ADDR_LEN); - } else { - fsa->fsa_digipeater[0] = null_ax25_address; - } - } - err = sizeof (struct full_sockaddr_ax25); - -out: - release_sock(sk); - - return err; -} - -static int ax25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) -{ - DECLARE_SOCKADDR(struct sockaddr_ax25 *, usax, msg->msg_name); - struct sock *sk = sock->sk; - struct sockaddr_ax25 sax; - struct sk_buff *skb; - ax25_digi dtmp, *dp; - ax25_cb *ax25; - size_t size; - int lv, err, addr_len = msg->msg_namelen; - - if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_CMSG_COMPAT)) - return -EINVAL; - - lock_sock(sk); - ax25 = sk_to_ax25(sk); - - if (sock_flag(sk, SOCK_ZAPPED)) { - err = -EADDRNOTAVAIL; - goto out; - } - - if (sk->sk_shutdown & SEND_SHUTDOWN) { - send_sig(SIGPIPE, current, 0); - err = -EPIPE; - goto out; - } - - if (ax25->ax25_dev == NULL) { - err = -ENETUNREACH; - goto out; - } - - if (len > ax25->ax25_dev->dev->mtu) { - err = -EMSGSIZE; - goto out; - } - - if (usax != NULL) { - if (usax->sax25_family != AF_AX25) { - err = -EINVAL; - goto out; - } - - if (addr_len == sizeof(struct sockaddr_ax25)) - /* ax25_sendmsg(): uses obsolete socket structure */ - ; - else if (addr_len != sizeof(struct full_sockaddr_ax25)) - /* support for old structure may go away some time - * ax25_sendmsg(): uses old (6 digipeater) - * socket structure. - */ - if ((addr_len < sizeof(struct sockaddr_ax25) + sizeof(ax25_address) * 6) || - (addr_len > sizeof(struct full_sockaddr_ax25))) { - err = -EINVAL; - goto out; - } - - - if (addr_len > sizeof(struct sockaddr_ax25) && usax->sax25_ndigis != 0) { - int ct = 0; - struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)usax; - - /* Valid number of digipeaters ? */ - if (usax->sax25_ndigis < 1 || - usax->sax25_ndigis > AX25_MAX_DIGIS || - addr_len < sizeof(struct sockaddr_ax25) + - sizeof(ax25_address) * usax->sax25_ndigis) { - err = -EINVAL; - goto out; - } - - dtmp.ndigi = usax->sax25_ndigis; - - while (ct < usax->sax25_ndigis) { - dtmp.repeated[ct] = 0; - dtmp.calls[ct] = fsa->fsa_digipeater[ct]; - ct++; - } - - dtmp.lastrepeat = 0; - } - - sax = *usax; - if (sk->sk_type == SOCK_SEQPACKET && - ax25cmp(&ax25->dest_addr, &sax.sax25_call)) { - err = -EISCONN; - goto out; - } - if (usax->sax25_ndigis == 0) - dp = NULL; - else - dp = &dtmp; - } else { - /* - * FIXME: 1003.1g - if the socket is like this because - * it has become closed (not started closed) and is VC - * we ought to SIGPIPE, EPIPE - */ - if (sk->sk_state != TCP_ESTABLISHED) { - err = -ENOTCONN; - goto out; - } - sax.sax25_family = AF_AX25; - sax.sax25_call = ax25->dest_addr; - dp = ax25->digipeat; - } - - /* Build a packet */ - /* Assume the worst case */ - size = len + ax25->ax25_dev->dev->hard_header_len; - - skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT, &err); - if (skb == NULL) - goto out; - - skb_reserve(skb, size - len); - - /* User data follows immediately after the AX.25 data */ - if (memcpy_from_msg(skb_put(skb, len), msg, len)) { - err = -EFAULT; - kfree_skb(skb); - goto out; - } - - skb_reset_network_header(skb); - - /* Add the PID if one is not supplied by the user in the skb */ - if (!ax25->pidincl) - *(u8 *)skb_push(skb, 1) = sk->sk_protocol; - - if (sk->sk_type == SOCK_SEQPACKET) { - /* Connected mode sockets go via the LAPB machine */ - if (sk->sk_state != TCP_ESTABLISHED) { - kfree_skb(skb); - err = -ENOTCONN; - goto out; - } - - /* Shove it onto the queue and kick */ - ax25_output(ax25, ax25->paclen, skb); - - err = len; - goto out; - } - - skb_push(skb, 1 + ax25_addr_size(dp)); - - /* Building AX.25 Header */ - - /* Build an AX.25 header */ - lv = ax25_addr_build(skb->data, &ax25->source_addr, &sax.sax25_call, - dp, AX25_COMMAND, AX25_MODULUS); - - skb_set_transport_header(skb, lv); - - *skb_transport_header(skb) = AX25_UI; - - /* Datagram frames go straight out of the door as UI */ - ax25_queue_xmit(skb, ax25->ax25_dev->dev); - - err = len; - -out: - release_sock(sk); - - return err; -} - -static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, - int flags) -{ - struct sock *sk = sock->sk; - struct sk_buff *skb, *last; - struct sk_buff_head *sk_queue; - int copied; - int err = 0; - int off = 0; - long timeo; - - lock_sock(sk); - /* - * This works for seqpacket too. The receiver has ordered the - * queue for us! We do one quick check first though - */ - if (sk->sk_type == SOCK_SEQPACKET && sk->sk_state != TCP_ESTABLISHED) { - err = -ENOTCONN; - goto out; - } - - /* We need support for non-blocking reads. */ - sk_queue = &sk->sk_receive_queue; - skb = __skb_try_recv_datagram(sk, sk_queue, flags, &off, &err, &last); - /* If no packet is available, release_sock(sk) and try again. */ - if (!skb) { - if (err != -EAGAIN) - goto out; - release_sock(sk); - timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); - while (timeo && !__skb_wait_for_more_packets(sk, sk_queue, &err, - &timeo, last)) { - skb = __skb_try_recv_datagram(sk, sk_queue, flags, &off, - &err, &last); - if (skb) - break; - - if (err != -EAGAIN) - goto done; - } - if (!skb) - goto done; - lock_sock(sk); - } - - if (!sk_to_ax25(sk)->pidincl) - skb_pull(skb, 1); /* Remove PID */ - - skb_reset_transport_header(skb); - copied = skb->len; - - if (copied > size) { - copied = size; - msg->msg_flags |= MSG_TRUNC; - } - - skb_copy_datagram_msg(skb, 0, msg, copied); - - if (msg->msg_name) { - ax25_digi digi; - ax25_address src; - const unsigned char *mac = skb_mac_header(skb); - DECLARE_SOCKADDR(struct sockaddr_ax25 *, sax, msg->msg_name); - - memset(sax, 0, sizeof(struct full_sockaddr_ax25)); - ax25_addr_parse(mac + 1, skb->data - mac - 1, &src, NULL, - &digi, NULL, NULL); - sax->sax25_family = AF_AX25; - /* We set this correctly, even though we may not let the - application know the digi calls further down (because it - did NOT ask to know them). This could get political... **/ - sax->sax25_ndigis = digi.ndigi; - sax->sax25_call = src; - - if (sax->sax25_ndigis != 0) { - int ct; - struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)sax; - - for (ct = 0; ct < digi.ndigi; ct++) - fsa->fsa_digipeater[ct] = digi.calls[ct]; - } - msg->msg_namelen = sizeof(struct full_sockaddr_ax25); - } - - skb_free_datagram(sk, skb); - err = copied; - -out: - release_sock(sk); - -done: - return err; -} - -static int ax25_shutdown(struct socket *sk, int how) -{ - /* FIXME - generate DM and RNR states */ - return -EOPNOTSUPP; -} - -static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) -{ - struct sock *sk = sock->sk; - void __user *argp = (void __user *)arg; - int res = 0; - - lock_sock(sk); - switch (cmd) { - case TIOCOUTQ: { - long amount; - - amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); - if (amount < 0) - amount = 0; - res = put_user(amount, (int __user *)argp); - break; - } - - case TIOCINQ: { - struct sk_buff *skb; - long amount = 0L; - /* These two are safe on a single CPU system as only user tasks fiddle here */ - if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) - amount = skb->len; - res = put_user(amount, (int __user *) argp); - break; - } - - case SIOCAX25ADDUID: /* Add a uid to the uid/call map table */ - case SIOCAX25DELUID: /* Delete a uid from the uid/call map table */ - case SIOCAX25GETUID: { - struct sockaddr_ax25 sax25; - if (copy_from_user(&sax25, argp, sizeof(sax25))) { - res = -EFAULT; - break; - } - res = ax25_uid_ioctl(cmd, &sax25); - break; - } - - case SIOCAX25NOUID: { /* Set the default policy (default/bar) */ - long amount; - if (!capable(CAP_NET_ADMIN)) { - res = -EPERM; - break; - } - if (get_user(amount, (long __user *)argp)) { - res = -EFAULT; - break; - } - if (amount < 0 || amount > AX25_NOUID_BLOCK) { - res = -EINVAL; - break; - } - ax25_uid_policy = amount; - res = 0; - break; - } - - case SIOCADDRT: - case SIOCDELRT: - case SIOCAX25OPTRT: - if (!capable(CAP_NET_ADMIN)) { - res = -EPERM; - break; - } - res = ax25_rt_ioctl(cmd, argp); - break; - - case SIOCAX25CTLCON: - if (!capable(CAP_NET_ADMIN)) { - res = -EPERM; - break; - } - res = ax25_ctl_ioctl(cmd, argp); - break; - - case SIOCAX25GETINFO: - case SIOCAX25GETINFOOLD: { - ax25_cb *ax25 = sk_to_ax25(sk); - struct ax25_info_struct ax25_info; - - ax25_info.t1 = ax25->t1 / HZ; - ax25_info.t2 = ax25->t2 / HZ; - ax25_info.t3 = ax25->t3 / HZ; - ax25_info.idle = ax25->idle / (60 * HZ); - ax25_info.n2 = ax25->n2; - ax25_info.t1timer = ax25_display_timer(&ax25->t1timer) / HZ; - ax25_info.t2timer = ax25_display_timer(&ax25->t2timer) / HZ; - ax25_info.t3timer = ax25_display_timer(&ax25->t3timer) / HZ; - ax25_info.idletimer = ax25_display_timer(&ax25->idletimer) / (60 * HZ); - ax25_info.n2count = ax25->n2count; - ax25_info.state = ax25->state; - ax25_info.rcv_q = sk_rmem_alloc_get(sk); - ax25_info.snd_q = sk_wmem_alloc_get(sk); - ax25_info.vs = ax25->vs; - ax25_info.vr = ax25->vr; - ax25_info.va = ax25->va; - ax25_info.vs_max = ax25->vs; /* reserved */ - ax25_info.paclen = ax25->paclen; - ax25_info.window = ax25->window; - - /* old structure? */ - if (cmd == SIOCAX25GETINFOOLD) { - static int warned = 0; - if (!warned) { - printk(KERN_INFO "%s uses old SIOCAX25GETINFO\n", - current->comm); - warned=1; - } - - if (copy_to_user(argp, &ax25_info, sizeof(struct ax25_info_struct_deprecated))) { - res = -EFAULT; - break; - } - } else { - if (copy_to_user(argp, &ax25_info, sizeof(struct ax25_info_struct))) { - res = -EINVAL; - break; - } - } - res = 0; - break; - } - - case SIOCAX25ADDFWD: - case SIOCAX25DELFWD: { - struct ax25_fwd_struct ax25_fwd; - if (!capable(CAP_NET_ADMIN)) { - res = -EPERM; - break; - } - if (copy_from_user(&ax25_fwd, argp, sizeof(ax25_fwd))) { - res = -EFAULT; - break; - } - res = ax25_fwd_ioctl(cmd, &ax25_fwd); - break; - } - - case SIOCGIFADDR: - case SIOCSIFADDR: - case SIOCGIFDSTADDR: - case SIOCSIFDSTADDR: - case SIOCGIFBRDADDR: - case SIOCSIFBRDADDR: - case SIOCGIFNETMASK: - case SIOCSIFNETMASK: - case SIOCGIFMETRIC: - case SIOCSIFMETRIC: - res = -EINVAL; - break; - - default: - res = -ENOIOCTLCMD; - break; - } - release_sock(sk); - - return res; -} - -#ifdef CONFIG_PROC_FS - -static void *ax25_info_start(struct seq_file *seq, loff_t *pos) - __acquires(ax25_list_lock) -{ - spin_lock_bh(&ax25_list_lock); - return seq_hlist_start(&ax25_list, *pos); -} - -static void *ax25_info_next(struct seq_file *seq, void *v, loff_t *pos) -{ - return seq_hlist_next(v, &ax25_list, pos); -} - -static void ax25_info_stop(struct seq_file *seq, void *v) - __releases(ax25_list_lock) -{ - spin_unlock_bh(&ax25_list_lock); -} - -static int ax25_info_show(struct seq_file *seq, void *v) -{ - ax25_cb *ax25 = hlist_entry(v, struct ax25_cb, ax25_node); - char buf[11]; - int k; - - - /* - * New format: - * magic dev src_addr dest_addr,digi1,digi2,.. st vs vr va t1 t1 t2 t2 t3 t3 idle idle n2 n2 rtt window paclen Snd-Q Rcv-Q inode - */ - - seq_printf(seq, "%p %s %s%s ", - ax25, - ax25->ax25_dev == NULL? "???" : ax25->ax25_dev->dev->name, - ax2asc(buf, &ax25->source_addr), - ax25->iamdigi? "*":""); - seq_printf(seq, "%s", ax2asc(buf, &ax25->dest_addr)); - - for (k=0; (ax25->digipeat != NULL) && (k < ax25->digipeat->ndigi); k++) { - seq_printf(seq, ",%s%s", - ax2asc(buf, &ax25->digipeat->calls[k]), - ax25->digipeat->repeated[k]? "*":""); - } - - seq_printf(seq, " %d %d %d %d %lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %d %d", - ax25->state, - ax25->vs, ax25->vr, ax25->va, - ax25_display_timer(&ax25->t1timer) / HZ, ax25->t1 / HZ, - ax25_display_timer(&ax25->t2timer) / HZ, ax25->t2 / HZ, - ax25_display_timer(&ax25->t3timer) / HZ, ax25->t3 / HZ, - ax25_display_timer(&ax25->idletimer) / (60 * HZ), - ax25->idle / (60 * HZ), - ax25->n2count, ax25->n2, - ax25->rtt / HZ, - ax25->window, - ax25->paclen); - - if (ax25->sk != NULL) { - seq_printf(seq, " %d %d %llu\n", - sk_wmem_alloc_get(ax25->sk), - sk_rmem_alloc_get(ax25->sk), - sock_i_ino(ax25->sk)); - } else { - seq_puts(seq, " * * *\n"); - } - return 0; -} - -static const struct seq_operations ax25_info_seqops = { - .start = ax25_info_start, - .next = ax25_info_next, - .stop = ax25_info_stop, - .show = ax25_info_show, -}; -#endif - -static const struct net_proto_family ax25_family_ops = { - .family = PF_AX25, - .create = ax25_create, - .owner = THIS_MODULE, -}; - -static const struct proto_ops ax25_proto_ops = { - .family = PF_AX25, - .owner = THIS_MODULE, - .release = ax25_release, - .bind = ax25_bind, - .connect = ax25_connect, - .socketpair = sock_no_socketpair, - .accept = ax25_accept, - .getname = ax25_getname, - .poll = datagram_poll, - .ioctl = ax25_ioctl, - .gettstamp = sock_gettstamp, - .listen = ax25_listen, - .shutdown = ax25_shutdown, - .setsockopt = ax25_setsockopt, - .getsockopt = ax25_getsockopt, - .sendmsg = ax25_sendmsg, - .recvmsg = ax25_recvmsg, - .mmap = sock_no_mmap, -}; - -/* - * Called by socket.c on kernel start up - */ -static struct packet_type ax25_packet_type __read_mostly = { - .type = cpu_to_be16(ETH_P_AX25), - .func = ax25_kiss_rcv, -}; - -static struct notifier_block ax25_dev_notifier = { - .notifier_call = ax25_device_event, -}; - -static int __init ax25_init(void) -{ - int rc = proto_register(&ax25_proto, 0); - - if (rc != 0) - goto out; - - sock_register(&ax25_family_ops); - dev_add_pack(&ax25_packet_type); - register_netdevice_notifier(&ax25_dev_notifier); - - proc_create_seq("ax25_route", 0444, init_net.proc_net, &ax25_rt_seqops); - proc_create_seq("ax25", 0444, init_net.proc_net, &ax25_info_seqops); - proc_create_seq("ax25_calls", 0444, init_net.proc_net, - &ax25_uid_seqops); -out: - return rc; -} -module_init(ax25_init); - - -MODULE_AUTHOR("Jonathan Naylor G4KLX <g4klx@g4klx.demon.co.uk>"); -MODULE_DESCRIPTION("The amateur radio AX.25 link layer protocol"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_NETPROTO(PF_AX25); - -static void __exit ax25_exit(void) -{ - remove_proc_entry("ax25_route", init_net.proc_net); - remove_proc_entry("ax25", init_net.proc_net); - remove_proc_entry("ax25_calls", init_net.proc_net); - - unregister_netdevice_notifier(&ax25_dev_notifier); - - dev_remove_pack(&ax25_packet_type); - - sock_unregister(PF_AX25); - proto_unregister(&ax25_proto); - - ax25_rt_free(); - ax25_uid_free(); - ax25_dev_free(); -} -module_exit(ax25_exit); diff --git a/net/ax25/ax25_addr.c b/net/ax25/ax25_addr.c deleted file mode 100644 index f68865a4d0ab..000000000000 --- a/net/ax25/ax25_addr.c +++ /dev/null @@ -1,303 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - */ -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <net/ax25.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <linux/uaccess.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/interrupt.h> - -/* - * The default broadcast address of an interface is QST-0; the default address - * is LINUX-1. The null address is defined as a callsign of all spaces with - * an SSID of zero. - */ - -const ax25_address ax25_bcast = - {{'Q' << 1, 'S' << 1, 'T' << 1, ' ' << 1, ' ' << 1, ' ' << 1, 0 << 1}}; -const ax25_address ax25_defaddr = - {{'L' << 1, 'I' << 1, 'N' << 1, 'U' << 1, 'X' << 1, ' ' << 1, 1 << 1}}; -const ax25_address null_ax25_address = - {{' ' << 1, ' ' << 1, ' ' << 1, ' ' << 1, ' ' << 1, ' ' << 1, 0 << 1}}; - -EXPORT_SYMBOL_GPL(ax25_bcast); -EXPORT_SYMBOL_GPL(ax25_defaddr); -EXPORT_SYMBOL(null_ax25_address); - -/* - * ax25 -> ascii conversion - */ -char *ax2asc(char *buf, const ax25_address *a) -{ - char c, *s; - int n; - - for (n = 0, s = buf; n < 6; n++) { - c = (a->ax25_call[n] >> 1) & 0x7F; - - if (c != ' ') *s++ = c; - } - - *s++ = '-'; - - if ((n = ((a->ax25_call[6] >> 1) & 0x0F)) > 9) { - *s++ = '1'; - n -= 10; - } - - *s++ = n + '0'; - *s++ = '\0'; - - if (*buf == '\0' || *buf == '-') - return "*"; - - return buf; - -} - -EXPORT_SYMBOL(ax2asc); - -/* - * ascii -> ax25 conversion - */ -void asc2ax(ax25_address *addr, const char *callsign) -{ - const char *s; - int n; - - for (s = callsign, n = 0; n < 6; n++) { - if (*s != '\0' && *s != '-') - addr->ax25_call[n] = *s++; - else - addr->ax25_call[n] = ' '; - addr->ax25_call[n] <<= 1; - addr->ax25_call[n] &= 0xFE; - } - - if (*s++ == '\0') { - addr->ax25_call[6] = 0x00; - return; - } - - addr->ax25_call[6] = *s++ - '0'; - - if (*s != '\0') { - addr->ax25_call[6] *= 10; - addr->ax25_call[6] += *s++ - '0'; - } - - addr->ax25_call[6] <<= 1; - addr->ax25_call[6] &= 0x1E; -} - -EXPORT_SYMBOL(asc2ax); - -/* - * Compare two ax.25 addresses - */ -int ax25cmp(const ax25_address *a, const ax25_address *b) -{ - int ct = 0; - - while (ct < 6) { - if ((a->ax25_call[ct] & 0xFE) != (b->ax25_call[ct] & 0xFE)) /* Clean off repeater bits */ - return 1; - ct++; - } - - if ((a->ax25_call[ct] & 0x1E) == (b->ax25_call[ct] & 0x1E)) /* SSID without control bit */ - return 0; - - return 2; /* Partial match */ -} - -EXPORT_SYMBOL(ax25cmp); - -/* - * Compare two AX.25 digipeater paths. - */ -int ax25digicmp(const ax25_digi *digi1, const ax25_digi *digi2) -{ - int i; - - if (digi1->ndigi != digi2->ndigi) - return 1; - - if (digi1->lastrepeat != digi2->lastrepeat) - return 1; - - for (i = 0; i < digi1->ndigi; i++) - if (ax25cmp(&digi1->calls[i], &digi2->calls[i]) != 0) - return 1; - - return 0; -} - -/* - * Given an AX.25 address pull of to, from, digi list, command/response and the start of data - * - */ -const unsigned char *ax25_addr_parse(const unsigned char *buf, int len, - ax25_address *src, ax25_address *dest, ax25_digi *digi, int *flags, - int *dama) -{ - int d = 0; - - if (len < 14) return NULL; - - if (flags != NULL) { - *flags = 0; - - if (buf[6] & AX25_CBIT) - *flags = AX25_COMMAND; - if (buf[13] & AX25_CBIT) - *flags = AX25_RESPONSE; - } - - if (dama != NULL) - *dama = ~buf[13] & AX25_DAMA_FLAG; - - /* Copy to, from */ - if (dest != NULL) - memcpy(dest, buf + 0, AX25_ADDR_LEN); - if (src != NULL) - memcpy(src, buf + 7, AX25_ADDR_LEN); - - buf += 2 * AX25_ADDR_LEN; - len -= 2 * AX25_ADDR_LEN; - - digi->lastrepeat = -1; - digi->ndigi = 0; - - while (!(buf[-1] & AX25_EBIT)) { - if (d >= AX25_MAX_DIGIS) - return NULL; - if (len < AX25_ADDR_LEN) - return NULL; - - memcpy(&digi->calls[d], buf, AX25_ADDR_LEN); - digi->ndigi = d + 1; - - if (buf[6] & AX25_HBIT) { - digi->repeated[d] = 1; - digi->lastrepeat = d; - } else { - digi->repeated[d] = 0; - } - - buf += AX25_ADDR_LEN; - len -= AX25_ADDR_LEN; - d++; - } - - return buf; -} - -/* - * Assemble an AX.25 header from the bits - */ -int ax25_addr_build(unsigned char *buf, const ax25_address *src, - const ax25_address *dest, const ax25_digi *d, int flag, int modulus) -{ - int len = 0; - int ct = 0; - - memcpy(buf, dest, AX25_ADDR_LEN); - buf[6] &= ~(AX25_EBIT | AX25_CBIT); - buf[6] |= AX25_SSSID_SPARE; - - if (flag == AX25_COMMAND) buf[6] |= AX25_CBIT; - - buf += AX25_ADDR_LEN; - len += AX25_ADDR_LEN; - - memcpy(buf, src, AX25_ADDR_LEN); - buf[6] &= ~(AX25_EBIT | AX25_CBIT); - buf[6] &= ~AX25_SSSID_SPARE; - - if (modulus == AX25_MODULUS) - buf[6] |= AX25_SSSID_SPARE; - else - buf[6] |= AX25_ESSID_SPARE; - - if (flag == AX25_RESPONSE) buf[6] |= AX25_CBIT; - - /* - * Fast path the normal digiless path - */ - if (d == NULL || d->ndigi == 0) { - buf[6] |= AX25_EBIT; - return 2 * AX25_ADDR_LEN; - } - - buf += AX25_ADDR_LEN; - len += AX25_ADDR_LEN; - - while (ct < d->ndigi) { - memcpy(buf, &d->calls[ct], AX25_ADDR_LEN); - - if (d->repeated[ct]) - buf[6] |= AX25_HBIT; - else - buf[6] &= ~AX25_HBIT; - - buf[6] &= ~AX25_EBIT; - buf[6] |= AX25_SSSID_SPARE; - - buf += AX25_ADDR_LEN; - len += AX25_ADDR_LEN; - ct++; - } - - buf[-1] |= AX25_EBIT; - - return len; -} - -int ax25_addr_size(const ax25_digi *dp) -{ - if (dp == NULL) - return 2 * AX25_ADDR_LEN; - - return AX25_ADDR_LEN * (2 + dp->ndigi); -} - -/* - * Reverse Digipeat List. May not pass both parameters as same struct - */ -void ax25_digi_invert(const ax25_digi *in, ax25_digi *out) -{ - int ct; - - out->ndigi = in->ndigi; - out->lastrepeat = in->ndigi - in->lastrepeat - 2; - - /* Invert the digipeaters */ - for (ct = 0; ct < in->ndigi; ct++) { - out->calls[ct] = in->calls[in->ndigi - ct - 1]; - - if (ct <= out->lastrepeat) { - out->calls[ct].ax25_call[6] |= AX25_HBIT; - out->repeated[ct] = 1; - } else { - out->calls[ct].ax25_call[6] &= ~AX25_HBIT; - out->repeated[ct] = 0; - } - } -} diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c deleted file mode 100644 index 3c0544fc4ad5..000000000000 --- a/net/ax25/ax25_dev.c +++ /dev/null @@ -1,200 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - */ -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/slab.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/spinlock.h> -#include <net/ax25.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/if_arp.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <linux/uaccess.h> -#include <linux/fcntl.h> -#include <linux/list.h> -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <linux/init.h> - -static LIST_HEAD(ax25_dev_list); -DEFINE_SPINLOCK(ax25_dev_lock); - -ax25_dev *ax25_addr_ax25dev(ax25_address *addr) -{ - ax25_dev *ax25_dev, *res = NULL; - - spin_lock_bh(&ax25_dev_lock); - list_for_each_entry(ax25_dev, &ax25_dev_list, list) - if (ax25cmp(addr, (const ax25_address *)ax25_dev->dev->dev_addr) == 0) { - res = ax25_dev; - ax25_dev_hold(ax25_dev); - break; - } - spin_unlock_bh(&ax25_dev_lock); - - return res; -} - -/* - * This is called when an interface is brought up. These are - * reasonable defaults. - */ -void ax25_dev_device_up(struct net_device *dev) -{ - ax25_dev *ax25_dev; - - ax25_dev = kzalloc_obj(*ax25_dev); - if (!ax25_dev) { - printk(KERN_ERR "AX.25: ax25_dev_device_up - out of memory\n"); - return; - } - - refcount_set(&ax25_dev->refcount, 1); - ax25_dev->dev = dev; - netdev_hold(dev, &ax25_dev->dev_tracker, GFP_KERNEL); - ax25_dev->forward = NULL; - ax25_dev->device_up = true; - - ax25_dev->values[AX25_VALUES_IPDEFMODE] = AX25_DEF_IPDEFMODE; - ax25_dev->values[AX25_VALUES_AXDEFMODE] = AX25_DEF_AXDEFMODE; - ax25_dev->values[AX25_VALUES_BACKOFF] = AX25_DEF_BACKOFF; - ax25_dev->values[AX25_VALUES_CONMODE] = AX25_DEF_CONMODE; - ax25_dev->values[AX25_VALUES_WINDOW] = AX25_DEF_WINDOW; - ax25_dev->values[AX25_VALUES_EWINDOW] = AX25_DEF_EWINDOW; - ax25_dev->values[AX25_VALUES_T1] = AX25_DEF_T1; - ax25_dev->values[AX25_VALUES_T2] = AX25_DEF_T2; - ax25_dev->values[AX25_VALUES_T3] = AX25_DEF_T3; - ax25_dev->values[AX25_VALUES_IDLE] = AX25_DEF_IDLE; - ax25_dev->values[AX25_VALUES_N2] = AX25_DEF_N2; - ax25_dev->values[AX25_VALUES_PACLEN] = AX25_DEF_PACLEN; - ax25_dev->values[AX25_VALUES_PROTOCOL] = AX25_DEF_PROTOCOL; - -#ifdef CONFIG_AX25_DAMA_SLAVE - ax25_dev->values[AX25_VALUES_DS_TIMEOUT]= AX25_DEF_DS_TIMEOUT; - - ax25_ds_setup_timer(ax25_dev); -#endif - - spin_lock_bh(&ax25_dev_lock); - list_add(&ax25_dev->list, &ax25_dev_list); - rcu_assign_pointer(dev->ax25_ptr, ax25_dev); - spin_unlock_bh(&ax25_dev_lock); - - ax25_register_dev_sysctl(ax25_dev); -} - -void ax25_dev_device_down(struct net_device *dev) -{ - ax25_dev *s, *ax25_dev; - - if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) - return; - - ax25_unregister_dev_sysctl(ax25_dev); - - spin_lock_bh(&ax25_dev_lock); - -#ifdef CONFIG_AX25_DAMA_SLAVE - timer_shutdown_sync(&ax25_dev->dama.slave_timer); -#endif - - /* - * Remove any packet forwarding that points to this device. - */ - list_for_each_entry(s, &ax25_dev_list, list) - if (s->forward == dev) - s->forward = NULL; - - list_for_each_entry(s, &ax25_dev_list, list) { - if (s == ax25_dev) { - list_del(&s->list); - break; - } - } - - RCU_INIT_POINTER(dev->ax25_ptr, NULL); - spin_unlock_bh(&ax25_dev_lock); - netdev_put(dev, &ax25_dev->dev_tracker); - ax25_dev_put(ax25_dev); -} - -int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd) -{ - ax25_dev *ax25_dev, *fwd_dev; - - if ((ax25_dev = ax25_addr_ax25dev(&fwd->port_from)) == NULL) - return -EINVAL; - - switch (cmd) { - case SIOCAX25ADDFWD: - fwd_dev = ax25_addr_ax25dev(&fwd->port_to); - if (!fwd_dev) { - ax25_dev_put(ax25_dev); - return -EINVAL; - } - if (ax25_dev->forward) { - ax25_dev_put(fwd_dev); - ax25_dev_put(ax25_dev); - return -EINVAL; - } - ax25_dev->forward = fwd_dev->dev; - ax25_dev_put(fwd_dev); - ax25_dev_put(ax25_dev); - break; - - case SIOCAX25DELFWD: - if (!ax25_dev->forward) { - ax25_dev_put(ax25_dev); - return -EINVAL; - } - ax25_dev->forward = NULL; - ax25_dev_put(ax25_dev); - break; - - default: - ax25_dev_put(ax25_dev); - return -EINVAL; - } - - return 0; -} - -struct net_device *ax25_fwd_dev(struct net_device *dev) -{ - ax25_dev *ax25_dev; - - if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) - return dev; - - if (ax25_dev->forward == NULL) - return dev; - - return ax25_dev->forward; -} - -/* - * Free all memory associated with device structures. - */ -void __exit ax25_dev_free(void) -{ - ax25_dev *s, *n; - - spin_lock_bh(&ax25_dev_lock); - list_for_each_entry_safe(s, n, &ax25_dev_list, list) { - netdev_put(s->dev, &s->dev_tracker); - list_del(&s->list); - ax25_dev_put(s); - } - spin_unlock_bh(&ax25_dev_lock); -} diff --git a/net/ax25/ax25_ds_in.c b/net/ax25/ax25_ds_in.c deleted file mode 100644 index c62f8fb06189..000000000000 --- a/net/ax25/ax25_ds_in.c +++ /dev/null @@ -1,298 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de) - */ -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <net/ax25.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <net/tcp_states.h> -#include <linux/uaccess.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/interrupt.h> - -/* - * State machine for state 1, Awaiting Connection State. - * The handling of the timer(s) is in file ax25_ds_timer.c. - * Handling of state 0 and connection release is in ax25.c. - */ -static int ax25_ds_state1_machine(ax25_cb *ax25, struct sk_buff *skb, int frametype, int pf, int type) -{ - switch (frametype) { - case AX25_SABM: - ax25->modulus = AX25_MODULUS; - ax25->window = ax25->ax25_dev->values[AX25_VALUES_WINDOW]; - ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE); - break; - - case AX25_SABME: - ax25->modulus = AX25_EMODULUS; - ax25->window = ax25->ax25_dev->values[AX25_VALUES_EWINDOW]; - ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE); - break; - - case AX25_DISC: - ax25_send_control(ax25, AX25_DM, pf, AX25_RESPONSE); - break; - - case AX25_UA: - ax25_calculate_rtt(ax25); - ax25_stop_t1timer(ax25); - ax25_start_t3timer(ax25); - ax25_start_idletimer(ax25); - ax25->vs = 0; - ax25->va = 0; - ax25->vr = 0; - ax25->state = AX25_STATE_3; - ax25->n2count = 0; - if (ax25->sk != NULL) { - bh_lock_sock(ax25->sk); - ax25->sk->sk_state = TCP_ESTABLISHED; - /* - * For WAIT_SABM connections we will produce an accept - * ready socket here - */ - if (!sock_flag(ax25->sk, SOCK_DEAD)) - ax25->sk->sk_state_change(ax25->sk); - bh_unlock_sock(ax25->sk); - } - ax25_dama_on(ax25); - - /* according to DK4EG's spec we are required to - * send a RR RESPONSE FINAL NR=0. - */ - - ax25_std_enquiry_response(ax25); - break; - - case AX25_DM: - if (pf) - ax25_disconnect(ax25, ECONNREFUSED); - break; - - default: - if (pf) - ax25_send_control(ax25, AX25_SABM, AX25_POLLON, AX25_COMMAND); - break; - } - - return 0; -} - -/* - * State machine for state 2, Awaiting Release State. - * The handling of the timer(s) is in file ax25_ds_timer.c - * Handling of state 0 and connection release is in ax25.c. - */ -static int ax25_ds_state2_machine(ax25_cb *ax25, struct sk_buff *skb, int frametype, int pf, int type) -{ - switch (frametype) { - case AX25_SABM: - case AX25_SABME: - ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND); - ax25_dama_off(ax25); - break; - - case AX25_DISC: - ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE); - ax25_dama_off(ax25); - ax25_disconnect(ax25, 0); - break; - - case AX25_DM: - case AX25_UA: - if (pf) { - ax25_dama_off(ax25); - ax25_disconnect(ax25, 0); - } - break; - - case AX25_I: - case AX25_REJ: - case AX25_RNR: - case AX25_RR: - if (pf) { - ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND); - ax25_dama_off(ax25); - } - break; - - default: - break; - } - - return 0; -} - -/* - * State machine for state 3, Connected State. - * The handling of the timer(s) is in file ax25_timer.c - * Handling of state 0 and connection release is in ax25.c. - */ -static int ax25_ds_state3_machine(ax25_cb *ax25, struct sk_buff *skb, int frametype, int ns, int nr, int pf, int type) -{ - int queued = 0; - - switch (frametype) { - case AX25_SABM: - case AX25_SABME: - if (frametype == AX25_SABM) { - ax25->modulus = AX25_MODULUS; - ax25->window = ax25->ax25_dev->values[AX25_VALUES_WINDOW]; - } else { - ax25->modulus = AX25_EMODULUS; - ax25->window = ax25->ax25_dev->values[AX25_VALUES_EWINDOW]; - } - ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE); - ax25_stop_t1timer(ax25); - ax25_start_t3timer(ax25); - ax25_start_idletimer(ax25); - ax25->condition = 0x00; - ax25->vs = 0; - ax25->va = 0; - ax25->vr = 0; - ax25_requeue_frames(ax25); - ax25_dama_on(ax25); - break; - - case AX25_DISC: - ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE); - ax25_dama_off(ax25); - ax25_disconnect(ax25, 0); - break; - - case AX25_DM: - ax25_dama_off(ax25); - ax25_disconnect(ax25, ECONNRESET); - break; - - case AX25_RR: - case AX25_RNR: - if (frametype == AX25_RR) - ax25->condition &= ~AX25_COND_PEER_RX_BUSY; - else - ax25->condition |= AX25_COND_PEER_RX_BUSY; - - if (ax25_validate_nr(ax25, nr)) { - if (ax25_check_iframes_acked(ax25, nr)) - ax25->n2count=0; - if (type == AX25_COMMAND && pf) - ax25_ds_enquiry_response(ax25); - } else { - ax25_ds_nr_error_recovery(ax25); - ax25->state = AX25_STATE_1; - } - break; - - case AX25_REJ: - ax25->condition &= ~AX25_COND_PEER_RX_BUSY; - - if (ax25_validate_nr(ax25, nr)) { - if (ax25->va != nr) - ax25->n2count=0; - - ax25_frames_acked(ax25, nr); - ax25_calculate_rtt(ax25); - ax25_stop_t1timer(ax25); - ax25_start_t3timer(ax25); - ax25_requeue_frames(ax25); - - if (type == AX25_COMMAND && pf) - ax25_ds_enquiry_response(ax25); - } else { - ax25_ds_nr_error_recovery(ax25); - ax25->state = AX25_STATE_1; - } - break; - - case AX25_I: - if (!ax25_validate_nr(ax25, nr)) { - ax25_ds_nr_error_recovery(ax25); - ax25->state = AX25_STATE_1; - break; - } - if (ax25->condition & AX25_COND_PEER_RX_BUSY) { - ax25_frames_acked(ax25, nr); - ax25->n2count = 0; - } else { - if (ax25_check_iframes_acked(ax25, nr)) - ax25->n2count = 0; - } - if (ax25->condition & AX25_COND_OWN_RX_BUSY) { - if (pf) ax25_ds_enquiry_response(ax25); - break; - } - if (ns == ax25->vr) { - ax25->vr = (ax25->vr + 1) % ax25->modulus; - queued = ax25_rx_iframe(ax25, skb); - if (ax25->condition & AX25_COND_OWN_RX_BUSY) - ax25->vr = ns; /* ax25->vr - 1 */ - ax25->condition &= ~AX25_COND_REJECT; - if (pf) { - ax25_ds_enquiry_response(ax25); - } else { - if (!(ax25->condition & AX25_COND_ACK_PENDING)) { - ax25->condition |= AX25_COND_ACK_PENDING; - ax25_start_t2timer(ax25); - } - } - } else { - if (ax25->condition & AX25_COND_REJECT) { - if (pf) ax25_ds_enquiry_response(ax25); - } else { - ax25->condition |= AX25_COND_REJECT; - ax25_ds_enquiry_response(ax25); - ax25->condition &= ~AX25_COND_ACK_PENDING; - } - } - break; - - case AX25_FRMR: - case AX25_ILLEGAL: - ax25_ds_establish_data_link(ax25); - ax25->state = AX25_STATE_1; - break; - - default: - break; - } - - return queued; -} - -/* - * Higher level upcall for a LAPB frame - */ -int ax25_ds_frame_in(ax25_cb *ax25, struct sk_buff *skb, int type) -{ - int queued = 0, frametype, ns, nr, pf; - - frametype = ax25_decode(ax25, skb, &ns, &nr, &pf); - - switch (ax25->state) { - case AX25_STATE_1: - queued = ax25_ds_state1_machine(ax25, skb, frametype, pf, type); - break; - case AX25_STATE_2: - queued = ax25_ds_state2_machine(ax25, skb, frametype, pf, type); - break; - case AX25_STATE_3: - queued = ax25_ds_state3_machine(ax25, skb, frametype, ns, nr, pf, type); - break; - } - - return queued; -} diff --git a/net/ax25/ax25_ds_subr.c b/net/ax25/ax25_ds_subr.c deleted file mode 100644 index f00e27df3c76..000000000000 --- a/net/ax25/ax25_ds_subr.c +++ /dev/null @@ -1,204 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de) - */ -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/spinlock.h> -#include <linux/net.h> -#include <linux/gfp.h> -#include <net/ax25.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <linux/uaccess.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/interrupt.h> - -void ax25_ds_nr_error_recovery(ax25_cb *ax25) -{ - ax25_ds_establish_data_link(ax25); -} - -/* - * dl1bke 960114: transmit I frames on DAMA poll - */ -void ax25_ds_enquiry_response(ax25_cb *ax25) -{ - ax25_cb *ax25o; - - /* Please note that neither DK4EG's nor DG2FEF's - * DAMA spec mention the following behaviour as seen - * with TheFirmware: - * - * DB0ACH->DL1BKE <RR C P R0> [DAMA] - * DL1BKE->DB0ACH <I NR=0 NS=0> - * DL1BKE-7->DB0PRA-6 DB0ACH <I C S3 R5> - * DL1BKE->DB0ACH <RR R F R0> - * - * The Flexnet DAMA Master implementation apparently - * insists on the "proper" AX.25 behaviour: - * - * DB0ACH->DL1BKE <RR C P R0> [DAMA] - * DL1BKE->DB0ACH <RR R F R0> - * DL1BKE->DB0ACH <I NR=0 NS=0> - * DL1BKE-7->DB0PRA-6 DB0ACH <I C S3 R5> - * - * Flexnet refuses to send us *any* I frame if we send - * a REJ in case AX25_COND_REJECT is set. It is superfluous in - * this mode anyway (a RR or RNR invokes the retransmission). - * Is this a Flexnet bug? - */ - - ax25_std_enquiry_response(ax25); - - if (!(ax25->condition & AX25_COND_PEER_RX_BUSY)) { - ax25_requeue_frames(ax25); - ax25_kick(ax25); - } - - if (ax25->state == AX25_STATE_1 || ax25->state == AX25_STATE_2 || skb_peek(&ax25->ack_queue) != NULL) - ax25_ds_t1_timeout(ax25); - else - ax25->n2count = 0; - - ax25_start_t3timer(ax25); - ax25_ds_set_timer(ax25->ax25_dev); - - spin_lock(&ax25_list_lock); - ax25_for_each(ax25o, &ax25_list) { - if (ax25o == ax25) - continue; - - if (ax25o->ax25_dev != ax25->ax25_dev) - continue; - - if (ax25o->state == AX25_STATE_1 || ax25o->state == AX25_STATE_2) { - ax25_ds_t1_timeout(ax25o); - continue; - } - - if (!(ax25o->condition & AX25_COND_PEER_RX_BUSY) && ax25o->state == AX25_STATE_3) { - ax25_requeue_frames(ax25o); - ax25_kick(ax25o); - } - - if (ax25o->state == AX25_STATE_1 || ax25o->state == AX25_STATE_2 || skb_peek(&ax25o->ack_queue) != NULL) - ax25_ds_t1_timeout(ax25o); - - /* do not start T3 for listening sockets (tnx DD8NE) */ - - if (ax25o->state != AX25_STATE_0) - ax25_start_t3timer(ax25o); - } - spin_unlock(&ax25_list_lock); -} - -void ax25_ds_establish_data_link(ax25_cb *ax25) -{ - ax25->condition &= AX25_COND_DAMA_MODE; - ax25->n2count = 0; - ax25_calculate_t1(ax25); - ax25_start_t1timer(ax25); - ax25_stop_t2timer(ax25); - ax25_start_t3timer(ax25); -} - -/* - * :::FIXME::: - * This is a kludge. Not all drivers recognize kiss commands. - * We need a driver level request to switch duplex mode, that does - * either SCC changing, PI config or KISS as required. Currently - * this request isn't reliable. - */ -static void ax25_kiss_cmd(ax25_dev *ax25_dev, unsigned char cmd, unsigned char param) -{ - struct sk_buff *skb; - unsigned char *p; - - if (ax25_dev->dev == NULL) - return; - - if ((skb = alloc_skb(2, GFP_ATOMIC)) == NULL) - return; - - skb_reset_network_header(skb); - p = skb_put(skb, 2); - - *p++ = cmd; - *p++ = param; - - skb->protocol = ax25_type_trans(skb, ax25_dev->dev); - - dev_queue_xmit(skb); -} - -/* - * A nasty problem arises if we count the number of DAMA connections - * wrong, especially when connections on the device already existed - * and our network node (or the sysop) decides to turn on DAMA Master - * mode. We thus flag the 'real' slave connections with - * ax25->dama_slave=1 and look on every disconnect if still slave - * connections exist. - */ -static int ax25_check_dama_slave(ax25_dev *ax25_dev) -{ - ax25_cb *ax25; - int res = 0; - - spin_lock(&ax25_list_lock); - ax25_for_each(ax25, &ax25_list) - if (ax25->ax25_dev == ax25_dev && (ax25->condition & AX25_COND_DAMA_MODE) && ax25->state > AX25_STATE_1) { - res = 1; - break; - } - spin_unlock(&ax25_list_lock); - - return res; -} - -static void ax25_dev_dama_on(ax25_dev *ax25_dev) -{ - if (ax25_dev == NULL) - return; - - if (ax25_dev->dama.slave == 0) - ax25_kiss_cmd(ax25_dev, 5, 1); - - ax25_dev->dama.slave = 1; - ax25_ds_set_timer(ax25_dev); -} - -void ax25_dev_dama_off(ax25_dev *ax25_dev) -{ - if (ax25_dev == NULL) - return; - - if (ax25_dev->dama.slave && !ax25_check_dama_slave(ax25_dev)) { - ax25_kiss_cmd(ax25_dev, 5, 0); - ax25_dev->dama.slave = 0; - ax25_ds_del_timer(ax25_dev); - } -} - -void ax25_dama_on(ax25_cb *ax25) -{ - ax25_dev_dama_on(ax25->ax25_dev); - ax25->condition |= AX25_COND_DAMA_MODE; -} - -void ax25_dama_off(ax25_cb *ax25) -{ - ax25->condition &= ~AX25_COND_DAMA_MODE; - ax25_dev_dama_off(ax25->ax25_dev); -} diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c deleted file mode 100644 index 0c9e7775aa54..000000000000 --- a/net/ax25/ax25_ds_timer.c +++ /dev/null @@ -1,235 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de) - */ -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/spinlock.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/jiffies.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <net/tcp_states.h> -#include <net/ax25.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <linux/uaccess.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/interrupt.h> - -static void ax25_ds_timeout(struct timer_list *); - -/* - * Add DAMA slave timeout timer to timer list. - * Unlike the connection based timers the timeout function gets - * triggered every second. Please note that NET_AX25_DAMA_SLAVE_TIMEOUT - * (aka /proc/sys/net/ax25/{dev}/dama_slave_timeout) is still in - * 1/10th of a second. - */ - -void ax25_ds_setup_timer(ax25_dev *ax25_dev) -{ - timer_setup(&ax25_dev->dama.slave_timer, ax25_ds_timeout, 0); -} - -void ax25_ds_del_timer(ax25_dev *ax25_dev) -{ - if (ax25_dev) - timer_delete(&ax25_dev->dama.slave_timer); -} - -void ax25_ds_set_timer(ax25_dev *ax25_dev) -{ - if (ax25_dev == NULL) /* paranoia */ - return; - - ax25_dev->dama.slave_timeout = - msecs_to_jiffies(ax25_dev->values[AX25_VALUES_DS_TIMEOUT]) / 10; - mod_timer(&ax25_dev->dama.slave_timer, jiffies + HZ); -} - -/* - * DAMA Slave Timeout - * Silently discard all (slave) connections in case our master forgot us... - */ - -static void ax25_ds_timeout(struct timer_list *t) -{ - ax25_dev *ax25_dev = timer_container_of(ax25_dev, t, dama.slave_timer); - ax25_cb *ax25; - - if (ax25_dev == NULL || !ax25_dev->dama.slave) - return; /* Yikes! */ - - if (!ax25_dev->dama.slave_timeout || --ax25_dev->dama.slave_timeout) { - ax25_ds_set_timer(ax25_dev); - return; - } - - spin_lock(&ax25_list_lock); - ax25_for_each(ax25, &ax25_list) { - if (ax25->ax25_dev != ax25_dev || !(ax25->condition & AX25_COND_DAMA_MODE)) - continue; - - ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND); - ax25_disconnect(ax25, ETIMEDOUT); - } - spin_unlock(&ax25_list_lock); - - ax25_dev_dama_off(ax25_dev); -} - -void ax25_ds_heartbeat_expiry(ax25_cb *ax25) -{ - struct sock *sk=ax25->sk; - - if (sk) - bh_lock_sock(sk); - - switch (ax25->state) { - - case AX25_STATE_0: - case AX25_STATE_2: - /* Magic here: If we listen() and a new link dies before it - is accepted() it isn't 'dead' so doesn't get removed. */ - if (!sk || sock_flag(sk, SOCK_DESTROY) || - (sk->sk_state == TCP_LISTEN && - sock_flag(sk, SOCK_DEAD))) { - if (sk) { - sock_hold(sk); - ax25_destroy_socket(ax25); - bh_unlock_sock(sk); - /* Ungrab socket and destroy it */ - sock_put(sk); - } else - ax25_destroy_socket(ax25); - return; - } - break; - - case AX25_STATE_3: - /* - * Check the state of the receive buffer. - */ - if (sk != NULL) { - if (atomic_read(&sk->sk_rmem_alloc) < - (sk->sk_rcvbuf >> 1) && - (ax25->condition & AX25_COND_OWN_RX_BUSY)) { - ax25->condition &= ~AX25_COND_OWN_RX_BUSY; - ax25->condition &= ~AX25_COND_ACK_PENDING; - break; - } - } - break; - } - - if (sk) - bh_unlock_sock(sk); - - ax25_start_heartbeat(ax25); -} - -/* dl1bke 960114: T3 works much like the IDLE timeout, but - * gets reloaded with every frame for this - * connection. - */ -void ax25_ds_t3timer_expiry(ax25_cb *ax25) -{ - ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND); - ax25_dama_off(ax25); - ax25_disconnect(ax25, ETIMEDOUT); -} - -/* dl1bke 960228: close the connection when IDLE expires. - * unlike T3 this timer gets reloaded only on - * I frames. - */ -void ax25_ds_idletimer_expiry(ax25_cb *ax25) -{ - ax25_clear_queues(ax25); - - ax25->n2count = 0; - ax25->state = AX25_STATE_2; - - ax25_calculate_t1(ax25); - ax25_start_t1timer(ax25); - ax25_stop_t3timer(ax25); - - if (ax25->sk != NULL) { - bh_lock_sock(ax25->sk); - ax25->sk->sk_state = TCP_CLOSE; - ax25->sk->sk_err = 0; - ax25->sk->sk_shutdown |= SEND_SHUTDOWN; - if (!sock_flag(ax25->sk, SOCK_DEAD)) { - ax25->sk->sk_state_change(ax25->sk); - sock_set_flag(ax25->sk, SOCK_DEAD); - } - bh_unlock_sock(ax25->sk); - } -} - -/* dl1bke 960114: The DAMA protocol requires to send data and SABM/DISC - * within the poll of any connected channel. Remember - * that we are not allowed to send anything unless we - * get polled by the Master. - * - * Thus we'll have to do parts of our T1 handling in - * ax25_enquiry_response(). - */ -void ax25_ds_t1_timeout(ax25_cb *ax25) -{ - switch (ax25->state) { - case AX25_STATE_1: - if (ax25->n2count == ax25->n2) { - if (ax25->modulus == AX25_MODULUS) { - ax25_disconnect(ax25, ETIMEDOUT); - return; - } else { - ax25->modulus = AX25_MODULUS; - ax25->window = ax25->ax25_dev->values[AX25_VALUES_WINDOW]; - ax25->n2count = 0; - ax25_send_control(ax25, AX25_SABM, AX25_POLLOFF, AX25_COMMAND); - } - } else { - ax25->n2count++; - if (ax25->modulus == AX25_MODULUS) - ax25_send_control(ax25, AX25_SABM, AX25_POLLOFF, AX25_COMMAND); - else - ax25_send_control(ax25, AX25_SABME, AX25_POLLOFF, AX25_COMMAND); - } - break; - - case AX25_STATE_2: - if (ax25->n2count == ax25->n2) { - ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND); - if (!sock_flag(ax25->sk, SOCK_DESTROY)) - ax25_disconnect(ax25, ETIMEDOUT); - return; - } else { - ax25->n2count++; - } - break; - - case AX25_STATE_3: - if (ax25->n2count == ax25->n2) { - ax25_send_control(ax25, AX25_DM, AX25_POLLON, AX25_RESPONSE); - ax25_disconnect(ax25, ETIMEDOUT); - return; - } else { - ax25->n2count++; - } - break; - } - - ax25_calculate_t1(ax25); - ax25_start_t1timer(ax25); -} diff --git a/net/ax25/ax25_iface.c b/net/ax25/ax25_iface.c deleted file mode 100644 index 3ad454416a5c..000000000000 --- a/net/ax25/ax25_iface.c +++ /dev/null @@ -1,214 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - */ -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/spinlock.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/slab.h> -#include <net/ax25.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <linux/uaccess.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/interrupt.h> - -static struct ax25_protocol *protocol_list; -static DEFINE_RWLOCK(protocol_list_lock); - -static HLIST_HEAD(ax25_linkfail_list); -static DEFINE_SPINLOCK(linkfail_lock); - -static struct listen_struct { - struct listen_struct *next; - ax25_address callsign; - struct net_device *dev; -} *listen_list = NULL; -static DEFINE_SPINLOCK(listen_lock); - -/* - * Do not register the internal protocols AX25_P_TEXT, AX25_P_SEGMENT, - * AX25_P_IP or AX25_P_ARP ... - */ -void ax25_register_pid(struct ax25_protocol *ap) -{ - write_lock_bh(&protocol_list_lock); - ap->next = protocol_list; - protocol_list = ap; - write_unlock_bh(&protocol_list_lock); -} - -EXPORT_SYMBOL_GPL(ax25_register_pid); - -void ax25_protocol_release(unsigned int pid) -{ - struct ax25_protocol *protocol; - - write_lock_bh(&protocol_list_lock); - protocol = protocol_list; - if (protocol == NULL) - goto out; - - if (protocol->pid == pid) { - protocol_list = protocol->next; - goto out; - } - - while (protocol != NULL && protocol->next != NULL) { - if (protocol->next->pid == pid) { - protocol->next = protocol->next->next; - goto out; - } - - protocol = protocol->next; - } -out: - write_unlock_bh(&protocol_list_lock); -} - -EXPORT_SYMBOL(ax25_protocol_release); - -void ax25_linkfail_register(struct ax25_linkfail *lf) -{ - spin_lock_bh(&linkfail_lock); - hlist_add_head(&lf->lf_node, &ax25_linkfail_list); - spin_unlock_bh(&linkfail_lock); -} - -EXPORT_SYMBOL(ax25_linkfail_register); - -void ax25_linkfail_release(struct ax25_linkfail *lf) -{ - spin_lock_bh(&linkfail_lock); - hlist_del_init(&lf->lf_node); - spin_unlock_bh(&linkfail_lock); -} - -EXPORT_SYMBOL(ax25_linkfail_release); - -int ax25_listen_register(const ax25_address *callsign, struct net_device *dev) -{ - struct listen_struct *listen; - - if (ax25_listen_mine(callsign, dev)) - return 0; - - if ((listen = kmalloc_obj(*listen, GFP_ATOMIC)) == NULL) - return -ENOMEM; - - listen->callsign = *callsign; - listen->dev = dev; - - spin_lock_bh(&listen_lock); - listen->next = listen_list; - listen_list = listen; - spin_unlock_bh(&listen_lock); - - return 0; -} - -EXPORT_SYMBOL(ax25_listen_register); - -void ax25_listen_release(const ax25_address *callsign, struct net_device *dev) -{ - struct listen_struct *s, *listen; - - spin_lock_bh(&listen_lock); - listen = listen_list; - if (listen == NULL) { - spin_unlock_bh(&listen_lock); - return; - } - - if (ax25cmp(&listen->callsign, callsign) == 0 && listen->dev == dev) { - listen_list = listen->next; - spin_unlock_bh(&listen_lock); - kfree(listen); - return; - } - - while (listen != NULL && listen->next != NULL) { - if (ax25cmp(&listen->next->callsign, callsign) == 0 && listen->next->dev == dev) { - s = listen->next; - listen->next = listen->next->next; - spin_unlock_bh(&listen_lock); - kfree(s); - return; - } - - listen = listen->next; - } - spin_unlock_bh(&listen_lock); -} - -EXPORT_SYMBOL(ax25_listen_release); - -int (*ax25_protocol_function(unsigned int pid))(struct sk_buff *, ax25_cb *) -{ - int (*res)(struct sk_buff *, ax25_cb *) = NULL; - struct ax25_protocol *protocol; - - read_lock(&protocol_list_lock); - for (protocol = protocol_list; protocol != NULL; protocol = protocol->next) - if (protocol->pid == pid) { - res = protocol->func; - break; - } - read_unlock(&protocol_list_lock); - - return res; -} - -int ax25_listen_mine(const ax25_address *callsign, struct net_device *dev) -{ - struct listen_struct *listen; - - spin_lock_bh(&listen_lock); - for (listen = listen_list; listen != NULL; listen = listen->next) - if (ax25cmp(&listen->callsign, callsign) == 0 && - (listen->dev == dev || listen->dev == NULL)) { - spin_unlock_bh(&listen_lock); - return 1; - } - spin_unlock_bh(&listen_lock); - - return 0; -} - -void ax25_link_failed(ax25_cb *ax25, int reason) -{ - struct ax25_linkfail *lf; - - spin_lock_bh(&linkfail_lock); - hlist_for_each_entry(lf, &ax25_linkfail_list, lf_node) - lf->func(ax25, reason); - spin_unlock_bh(&linkfail_lock); -} - -int ax25_protocol_is_registered(unsigned int pid) -{ - struct ax25_protocol *protocol; - int res = 0; - - read_lock_bh(&protocol_list_lock); - for (protocol = protocol_list; protocol != NULL; protocol = protocol->next) - if (protocol->pid == pid) { - res = 1; - break; - } - read_unlock_bh(&protocol_list_lock); - - return res; -} diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c deleted file mode 100644 index d75b3e9ed93d..000000000000 --- a/net/ax25/ax25_in.c +++ /dev/null @@ -1,455 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk) - * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de) - * Copyright (C) Hans-Joachim Hetscher DD8NE (dd8ne@bnv-bamberg.de) - */ -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/slab.h> -#include <net/ax25.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <net/tcp_states.h> -#include <linux/uaccess.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/interrupt.h> - -/* - * Given a fragment, queue it on the fragment queue and if the fragment - * is complete, send it back to ax25_rx_iframe. - */ -static int ax25_rx_fragment(ax25_cb *ax25, struct sk_buff *skb) -{ - struct sk_buff *skbn, *skbo; - - if (ax25->fragno != 0) { - if (!(*skb->data & AX25_SEG_FIRST)) { - if ((ax25->fragno - 1) == (*skb->data & AX25_SEG_REM)) { - /* Enqueue fragment */ - ax25->fragno = *skb->data & AX25_SEG_REM; - skb_pull(skb, 1); /* skip fragno */ - ax25->fraglen += skb->len; - skb_queue_tail(&ax25->frag_queue, skb); - - /* Last fragment received ? */ - if (ax25->fragno == 0) { - skbn = alloc_skb(AX25_MAX_HEADER_LEN + - ax25->fraglen, - GFP_ATOMIC); - if (!skbn) { - skb_queue_purge(&ax25->frag_queue); - return 1; - } - - skb_reserve(skbn, AX25_MAX_HEADER_LEN); - - skbn->dev = ax25->ax25_dev->dev; - skb_reset_network_header(skbn); - skb_reset_transport_header(skbn); - - /* Copy data from the fragments */ - while ((skbo = skb_dequeue(&ax25->frag_queue)) != NULL) { - skb_copy_from_linear_data(skbo, - skb_put(skbn, skbo->len), - skbo->len); - kfree_skb(skbo); - } - - ax25->fraglen = 0; - - if (ax25_rx_iframe(ax25, skbn) == 0) - kfree_skb(skbn); - } - - return 1; - } - } - } else { - /* First fragment received */ - if (*skb->data & AX25_SEG_FIRST) { - skb_queue_purge(&ax25->frag_queue); - ax25->fragno = *skb->data & AX25_SEG_REM; - skb_pull(skb, 1); /* skip fragno */ - ax25->fraglen = skb->len; - skb_queue_tail(&ax25->frag_queue, skb); - return 1; - } - } - - return 0; -} - -/* - * This is where all valid I frames are sent to, to be dispatched to - * whichever protocol requires them. - */ -int ax25_rx_iframe(ax25_cb *ax25, struct sk_buff *skb) -{ - int (*func)(struct sk_buff *, ax25_cb *); - unsigned char pid; - int queued = 0; - - if (skb == NULL) return 0; - - ax25_start_idletimer(ax25); - - pid = *skb->data; - - if (pid == AX25_P_IP) { - /* working around a TCP bug to keep additional listeners - * happy. TCP re-uses the buffer and destroys the original - * content. - */ - struct sk_buff *skbn = skb_copy(skb, GFP_ATOMIC); - if (skbn != NULL) { - kfree_skb(skb); - skb = skbn; - } - - skb_pull(skb, 1); /* Remove PID */ - skb->mac_header = skb->network_header; - skb_reset_network_header(skb); - skb->dev = ax25->ax25_dev->dev; - skb->pkt_type = PACKET_HOST; - skb->protocol = htons(ETH_P_IP); - netif_rx(skb); - return 1; - } - if (pid == AX25_P_SEGMENT) { - skb_pull(skb, 1); /* Remove PID */ - return ax25_rx_fragment(ax25, skb); - } - - if ((func = ax25_protocol_function(pid)) != NULL) { - skb_pull(skb, 1); /* Remove PID */ - return (*func)(skb, ax25); - } - - if (ax25->sk != NULL && ax25->ax25_dev->values[AX25_VALUES_CONMODE] == 2) { - if ((!ax25->pidincl && ax25->sk->sk_protocol == pid) || - ax25->pidincl) { - if (sock_queue_rcv_skb(ax25->sk, skb) == 0) - queued = 1; - else - ax25->condition |= AX25_COND_OWN_RX_BUSY; - } - } - - return queued; -} - -/* - * Higher level upcall for a LAPB frame - */ -static int ax25_process_rx_frame(ax25_cb *ax25, struct sk_buff *skb, int type, int dama) -{ - int queued = 0; - - if (ax25->state == AX25_STATE_0) - return 0; - - switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) { - case AX25_PROTO_STD_SIMPLEX: - case AX25_PROTO_STD_DUPLEX: - queued = ax25_std_frame_in(ax25, skb, type); - break; - -#ifdef CONFIG_AX25_DAMA_SLAVE - case AX25_PROTO_DAMA_SLAVE: - if (dama || ax25->ax25_dev->dama.slave) - queued = ax25_ds_frame_in(ax25, skb, type); - else - queued = ax25_std_frame_in(ax25, skb, type); - break; -#endif - } - - return queued; -} - -static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, - const ax25_address *dev_addr, struct packet_type *ptype) -{ - ax25_address src, dest, *next_digi = NULL; - int type = 0, mine = 0, dama; - struct sock *make, *sk; - ax25_digi dp, reverse_dp; - ax25_cb *ax25; - ax25_dev *ax25_dev; - - /* - * Process the AX.25/LAPB frame. - */ - - skb_reset_transport_header(skb); - - if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) - goto free; - - /* - * Parse the address header. - */ - - if (ax25_addr_parse(skb->data, skb->len, &src, &dest, &dp, &type, &dama) == NULL) - goto free; - - /* - * Ours perhaps ? - */ - if (dp.lastrepeat + 1 < dp.ndigi) /* Not yet digipeated completely */ - next_digi = &dp.calls[dp.lastrepeat + 1]; - - /* - * Pull of the AX.25 headers leaving the CTRL/PID bytes - */ - skb_pull(skb, ax25_addr_size(&dp)); - - /* For our port addresses ? */ - if (ax25cmp(&dest, dev_addr) == 0 && dp.lastrepeat + 1 == dp.ndigi) - mine = 1; - - /* Also match on any registered callsign from L3/4 */ - if (!mine && ax25_listen_mine(&dest, dev) && dp.lastrepeat + 1 == dp.ndigi) - mine = 1; - - /* UI frame - bypass LAPB processing */ - if ((*skb->data & ~0x10) == AX25_UI && dp.lastrepeat + 1 == dp.ndigi) { - skb_set_transport_header(skb, 2); /* skip control and pid */ - - ax25_send_to_raw(&dest, skb, skb->data[1]); - - if (!mine && ax25cmp(&dest, (ax25_address *)dev->broadcast) != 0) - goto free; - - /* Now we are pointing at the pid byte */ - switch (skb->data[1]) { - case AX25_P_IP: - skb_pull(skb,2); /* drop PID/CTRL */ - skb_reset_transport_header(skb); - skb_reset_network_header(skb); - skb->dev = dev; - skb->pkt_type = PACKET_HOST; - skb->protocol = htons(ETH_P_IP); - netif_rx(skb); - break; - - case AX25_P_ARP: - skb_pull(skb,2); - skb_reset_transport_header(skb); - skb_reset_network_header(skb); - skb->dev = dev; - skb->pkt_type = PACKET_HOST; - skb->protocol = htons(ETH_P_ARP); - netif_rx(skb); - break; - case AX25_P_TEXT: - /* Now find a suitable dgram socket */ - sk = ax25_get_socket(&dest, &src, SOCK_DGRAM); - if (sk != NULL) { - bh_lock_sock(sk); - if (atomic_read(&sk->sk_rmem_alloc) >= - sk->sk_rcvbuf) { - kfree_skb(skb); - } else { - /* - * Remove the control and PID. - */ - skb_pull(skb, 2); - if (sock_queue_rcv_skb(sk, skb) != 0) - kfree_skb(skb); - } - bh_unlock_sock(sk); - sock_put(sk); - } else { - kfree_skb(skb); - } - break; - - default: - kfree_skb(skb); /* Will scan SOCK_AX25 RAW sockets */ - break; - } - - return 0; - } - - /* - * Is connected mode supported on this device ? - * If not, should we DM the incoming frame (except DMs) or - * silently ignore them. For now we stay quiet. - */ - if (ax25_dev->values[AX25_VALUES_CONMODE] == 0) - goto free; - - /* LAPB */ - - /* AX.25 state 1-4 */ - - ax25_digi_invert(&dp, &reverse_dp); - - if ((ax25 = ax25_find_cb(&dest, &src, &reverse_dp, dev)) != NULL) { - /* - * Process the frame. If it is queued up internally it - * returns one otherwise we free it immediately. This - * routine itself wakes the user context layers so we do - * no further work - */ - if (ax25_process_rx_frame(ax25, skb, type, dama) == 0) - kfree_skb(skb); - - ax25_cb_put(ax25); - return 0; - } - - /* AX.25 state 0 (disconnected) */ - - /* a) received not a SABM(E) */ - - if ((*skb->data & ~AX25_PF) != AX25_SABM && - (*skb->data & ~AX25_PF) != AX25_SABME) { - /* - * Never reply to a DM. Also ignore any connects for - * addresses that are not our interfaces and not a socket. - */ - if ((*skb->data & ~AX25_PF) != AX25_DM && mine) - ax25_return_dm(dev, &src, &dest, &dp); - - goto free; - } - - /* b) received SABM(E) */ - - if (dp.lastrepeat + 1 == dp.ndigi) - sk = ax25_find_listener(&dest, 0, dev, SOCK_SEQPACKET); - else - sk = ax25_find_listener(next_digi, 1, dev, SOCK_SEQPACKET); - - if (sk != NULL) { - bh_lock_sock(sk); - if (sk_acceptq_is_full(sk) || - (make = ax25_make_new(sk, ax25_dev)) == NULL) { - if (mine) - ax25_return_dm(dev, &src, &dest, &dp); - kfree_skb(skb); - bh_unlock_sock(sk); - sock_put(sk); - - return 0; - } - - ax25 = sk_to_ax25(make); - skb_set_owner_r(skb, make); - skb_queue_head(&sk->sk_receive_queue, skb); - - make->sk_state = TCP_ESTABLISHED; - - sk_acceptq_added(sk); - bh_unlock_sock(sk); - } else { - if (!mine) - goto free; - - if ((ax25 = ax25_create_cb()) == NULL) { - ax25_return_dm(dev, &src, &dest, &dp); - goto free; - } - - ax25_fillin_cb(ax25, ax25_dev); - } - - ax25->source_addr = dest; - ax25->dest_addr = src; - - /* - * Sort out any digipeated paths. - */ - if (dp.ndigi && !ax25->digipeat && - (ax25->digipeat = kmalloc_obj(ax25_digi, GFP_ATOMIC)) == NULL) { - kfree_skb(skb); - ax25_destroy_socket(ax25); - if (sk) - sock_put(sk); - return 0; - } - - if (dp.ndigi == 0) { - kfree(ax25->digipeat); - ax25->digipeat = NULL; - } else { - /* Reverse the source SABM's path */ - memcpy(ax25->digipeat, &reverse_dp, sizeof(ax25_digi)); - } - - if ((*skb->data & ~AX25_PF) == AX25_SABME) { - ax25->modulus = AX25_EMODULUS; - ax25->window = ax25_dev->values[AX25_VALUES_EWINDOW]; - } else { - ax25->modulus = AX25_MODULUS; - ax25->window = ax25_dev->values[AX25_VALUES_WINDOW]; - } - - ax25_send_control(ax25, AX25_UA, AX25_POLLON, AX25_RESPONSE); - -#ifdef CONFIG_AX25_DAMA_SLAVE - if (dama && ax25->ax25_dev->values[AX25_VALUES_PROTOCOL] == AX25_PROTO_DAMA_SLAVE) - ax25_dama_on(ax25); -#endif - - ax25->state = AX25_STATE_3; - - ax25_cb_add(ax25); - - ax25_start_heartbeat(ax25); - ax25_start_t3timer(ax25); - ax25_start_idletimer(ax25); - - if (sk) { - if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk); - sock_put(sk); - } else { -free: - kfree_skb(skb); - } - return 0; -} - -/* - * Receive an AX.25 frame via a SLIP interface. - */ -int ax25_kiss_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *ptype, struct net_device *orig_dev) -{ - skb = skb_share_check(skb, GFP_ATOMIC); - if (!skb) - return NET_RX_DROP; - - skb_orphan(skb); - - if (!net_eq(dev_net(dev), &init_net)) { - kfree_skb(skb); - return 0; - } - - if ((*skb->data & 0x0F) != 0) { - kfree_skb(skb); /* Not a KISS data frame */ - return 0; - } - - skb_pull(skb, AX25_KISS_HEADER_LEN); /* Remove the KISS byte */ - - return ax25_rcv(skb, dev, (const ax25_address *)dev->dev_addr, ptype); -} diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c deleted file mode 100644 index 215d4ccf12b9..000000000000 --- a/net/ax25/ax25_ip.c +++ /dev/null @@ -1,247 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - */ -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/slab.h> -#include <net/ax25.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/if_arp.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <linux/uaccess.h> -#include <linux/fcntl.h> -#include <linux/termios.h> /* For TIOCINQ/OUTQ */ -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <linux/notifier.h> -#include <linux/proc_fs.h> -#include <linux/stat.h> -#include <linux/sysctl.h> -#include <net/ip.h> -#include <net/arp.h> - -/* - * IP over AX.25 encapsulation. - */ - -/* - * Shove an AX.25 UI header on an IP packet and handle ARP - */ - -#ifdef CONFIG_INET - -static int ax25_hard_header(struct sk_buff *skb, struct net_device *dev, - unsigned short type, const void *daddr, - const void *saddr, unsigned int len) -{ - unsigned char *buff; - - /* they sometimes come back to us... */ - if (type == ETH_P_AX25) - return 0; - - /* header is an AX.25 UI frame from us to them */ - buff = skb_push(skb, AX25_HEADER_LEN); - *buff++ = 0x00; /* KISS DATA */ - - if (daddr != NULL) - memcpy(buff, daddr, dev->addr_len); /* Address specified */ - - buff[6] &= ~AX25_CBIT; - buff[6] &= ~AX25_EBIT; - buff[6] |= AX25_SSSID_SPARE; - buff += AX25_ADDR_LEN; - - if (saddr != NULL) - memcpy(buff, saddr, dev->addr_len); - else - memcpy(buff, dev->dev_addr, dev->addr_len); - - buff[6] &= ~AX25_CBIT; - buff[6] |= AX25_EBIT; - buff[6] |= AX25_SSSID_SPARE; - buff += AX25_ADDR_LEN; - - *buff++ = AX25_UI; /* UI */ - - /* Append a suitable AX.25 PID */ - switch (type) { - case ETH_P_IP: - *buff++ = AX25_P_IP; - break; - case ETH_P_ARP: - *buff++ = AX25_P_ARP; - break; - default: - printk(KERN_ERR "AX.25: ax25_hard_header - wrong protocol type 0x%2.2x\n", type); - *buff++ = 0; - break; - } - - if (daddr != NULL) - return AX25_HEADER_LEN; - - return -AX25_HEADER_LEN; /* Unfinished header */ -} - -netdev_tx_t ax25_ip_xmit(struct sk_buff *skb) -{ - struct sk_buff *ourskb; - unsigned char *bp = skb->data; - ax25_route *route; - struct net_device *dev = NULL; - ax25_address *src, *dst; - ax25_digi *digipeat = NULL; - ax25_dev *ax25_dev; - ax25_cb *ax25; - char ip_mode = ' '; - - dst = (ax25_address *)(bp + 1); - src = (ax25_address *)(bp + 8); - - ax25_route_lock_use(); - route = ax25_get_route(dst, NULL); - if (route) { - digipeat = route->digipeat; - dev = route->dev; - ip_mode = route->ip_mode; - } - - if (dev == NULL) - dev = skb->dev; - - rcu_read_lock(); - if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) { - kfree_skb(skb); - goto put; - } - - if (bp[16] == AX25_P_IP) { - if (ip_mode == 'V' || (ip_mode == ' ' && ax25_dev->values[AX25_VALUES_IPDEFMODE])) { - /* - * We copy the buffer and release the original thereby - * keeping it straight - * - * Note: we report 1 back so the caller will - * not feed the frame direct to the physical device - * We don't want that to happen. (It won't be upset - * as we have pulled the frame from the queue by - * freeing it). - * - * NB: TCP modifies buffers that are still - * on a device queue, thus we use skb_copy() - * instead of using skb_clone() unless this - * gets fixed. - */ - - ax25_address src_c; - ax25_address dst_c; - - if ((ourskb = skb_copy(skb, GFP_ATOMIC)) == NULL) { - kfree_skb(skb); - goto put; - } - - if (skb->sk != NULL) - skb_set_owner_w(ourskb, skb->sk); - - kfree_skb(skb); - /* dl9sau: bugfix - * after kfree_skb(), dst and src which were pointer - * to bp which is part of skb->data would not be valid - * anymore hope that after skb_pull(ourskb, ..) our - * dsc_c and src_c will not become invalid - */ - bp = ourskb->data; - dst_c = *(ax25_address *)(bp + 1); - src_c = *(ax25_address *)(bp + 8); - - skb_pull(ourskb, AX25_HEADER_LEN - 1); /* Keep PID */ - skb_reset_network_header(ourskb); - - ax25=ax25_send_frame( - ourskb, - ax25_dev->values[AX25_VALUES_PACLEN], - &src_c, - &dst_c, digipeat, dev); - if (ax25) { - ax25_cb_put(ax25); - } - goto put; - } - } - - bp[7] &= ~AX25_CBIT; - bp[7] &= ~AX25_EBIT; - bp[7] |= AX25_SSSID_SPARE; - - bp[14] &= ~AX25_CBIT; - bp[14] |= AX25_EBIT; - bp[14] |= AX25_SSSID_SPARE; - - skb_pull(skb, AX25_KISS_HEADER_LEN); - - if (digipeat != NULL) { - if ((ourskb = ax25_rt_build_path(skb, src, dst, route->digipeat)) == NULL) - goto put; - - skb = ourskb; - } - - ax25_queue_xmit(skb, dev); - -put: - rcu_read_unlock(); - ax25_route_lock_unuse(); - return NETDEV_TX_OK; -} - -#else /* INET */ - -static int ax25_hard_header(struct sk_buff *skb, struct net_device *dev, - unsigned short type, const void *daddr, - const void *saddr, unsigned int len) -{ - return -AX25_HEADER_LEN; -} - -netdev_tx_t ax25_ip_xmit(struct sk_buff *skb) -{ - kfree_skb(skb); - return NETDEV_TX_OK; -} -#endif - -static bool ax25_validate_header(const char *header, unsigned int len) -{ - ax25_digi digi; - - if (!len) - return false; - - if (header[0]) - return true; - - return ax25_addr_parse(header + 1, len - 1, NULL, NULL, &digi, NULL, - NULL); -} - -const struct header_ops ax25_header_ops = { - .create = ax25_hard_header, - .validate = ax25_validate_header, -}; - -EXPORT_SYMBOL(ax25_header_ops); -EXPORT_SYMBOL(ax25_ip_xmit); diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c deleted file mode 100644 index 8bca2ace98e5..000000000000 --- a/net/ax25/ax25_out.c +++ /dev/null @@ -1,398 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk) - * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de) - */ -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/spinlock.h> -#include <linux/net.h> -#include <linux/slab.h> -#include <net/ax25.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <linux/uaccess.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/interrupt.h> - -static DEFINE_SPINLOCK(ax25_frag_lock); - -ax25_cb *ax25_send_frame(struct sk_buff *skb, int paclen, const ax25_address *src, ax25_address *dest, ax25_digi *digi, struct net_device *dev) -{ - ax25_dev *ax25_dev; - ax25_cb *ax25; - - /* - * Take the default packet length for the device if zero is - * specified. - */ - if (paclen == 0) { - rcu_read_lock(); - ax25_dev = ax25_dev_ax25dev(dev); - if (!ax25_dev) { - rcu_read_unlock(); - return NULL; - } - paclen = ax25_dev->values[AX25_VALUES_PACLEN]; - rcu_read_unlock(); - } - - /* - * Look for an existing connection. - */ - if ((ax25 = ax25_find_cb(src, dest, digi, dev)) != NULL) { - ax25_output(ax25, paclen, skb); - return ax25; /* It already existed */ - } - - rcu_read_lock(); - ax25_dev = ax25_dev_ax25dev(dev); - if (!ax25_dev) { - rcu_read_unlock(); - return NULL; - } - - if ((ax25 = ax25_create_cb()) == NULL) { - rcu_read_unlock(); - return NULL; - } - ax25_fillin_cb(ax25, ax25_dev); - rcu_read_unlock(); - - ax25->source_addr = *src; - ax25->dest_addr = *dest; - - if (digi != NULL) { - ax25->digipeat = kmemdup(digi, sizeof(*digi), GFP_ATOMIC); - if (ax25->digipeat == NULL) { - ax25_cb_put(ax25); - return NULL; - } - } - - switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) { - case AX25_PROTO_STD_SIMPLEX: - case AX25_PROTO_STD_DUPLEX: - ax25_std_establish_data_link(ax25); - break; - -#ifdef CONFIG_AX25_DAMA_SLAVE - case AX25_PROTO_DAMA_SLAVE: - if (ax25_dev->dama.slave) - ax25_ds_establish_data_link(ax25); - else - ax25_std_establish_data_link(ax25); - break; -#endif - } - - /* - * There is one ref for the state machine; a caller needs - * one more to put it back, just like with the existing one. - */ - ax25_cb_hold(ax25); - - ax25_cb_add(ax25); - - ax25->state = AX25_STATE_1; - - ax25_start_heartbeat(ax25); - - ax25_output(ax25, paclen, skb); - - return ax25; /* We had to create it */ -} - -EXPORT_SYMBOL(ax25_send_frame); - -/* - * All outgoing AX.25 I frames pass via this routine. Therefore this is - * where the fragmentation of frames takes place. If fragment is set to - * zero then we are not allowed to do fragmentation, even if the frame - * is too large. - */ -void ax25_output(ax25_cb *ax25, int paclen, struct sk_buff *skb) -{ - struct sk_buff *skbn; - unsigned char *p; - int frontlen, len, fragno, ka9qfrag, first = 1; - - if (paclen < 16) { - WARN_ON_ONCE(1); - kfree_skb(skb); - return; - } - - if ((skb->len - 1) > paclen) { - if (*skb->data == AX25_P_TEXT) { - skb_pull(skb, 1); /* skip PID */ - ka9qfrag = 0; - } else { - paclen -= 2; /* Allow for fragment control info */ - ka9qfrag = 1; - } - - fragno = skb->len / paclen; - if (skb->len % paclen == 0) fragno--; - - frontlen = skb_headroom(skb); /* Address space + CTRL */ - - while (skb->len > 0) { - spin_lock_bh(&ax25_frag_lock); - if ((skbn = alloc_skb(paclen + 2 + frontlen, GFP_ATOMIC)) == NULL) { - spin_unlock_bh(&ax25_frag_lock); - printk(KERN_CRIT "AX.25: ax25_output - out of memory\n"); - return; - } - - if (skb->sk != NULL) - skb_set_owner_w(skbn, skb->sk); - - spin_unlock_bh(&ax25_frag_lock); - - len = (paclen > skb->len) ? skb->len : paclen; - - if (ka9qfrag == 1) { - skb_reserve(skbn, frontlen + 2); - skb_set_network_header(skbn, - skb_network_offset(skb)); - skb_copy_from_linear_data(skb, skb_put(skbn, len), len); - p = skb_push(skbn, 2); - - *p++ = AX25_P_SEGMENT; - - *p = fragno--; - if (first) { - *p |= AX25_SEG_FIRST; - first = 0; - } - } else { - skb_reserve(skbn, frontlen + 1); - skb_set_network_header(skbn, - skb_network_offset(skb)); - skb_copy_from_linear_data(skb, skb_put(skbn, len), len); - p = skb_push(skbn, 1); - *p = AX25_P_TEXT; - } - - skb_pull(skb, len); - skb_queue_tail(&ax25->write_queue, skbn); /* Throw it on the queue */ - } - - kfree_skb(skb); - } else { - skb_queue_tail(&ax25->write_queue, skb); /* Throw it on the queue */ - } - - switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) { - case AX25_PROTO_STD_SIMPLEX: - case AX25_PROTO_STD_DUPLEX: - ax25_kick(ax25); - break; - -#ifdef CONFIG_AX25_DAMA_SLAVE - /* - * A DAMA slave is _required_ to work as normal AX.25L2V2 - * if no DAMA master is available. - */ - case AX25_PROTO_DAMA_SLAVE: - if (!ax25->ax25_dev->dama.slave) ax25_kick(ax25); - break; -#endif - } -} - -/* - * This procedure is passed a buffer descriptor for an iframe. It builds - * the rest of the control part of the frame and then writes it out. - */ -static void ax25_send_iframe(ax25_cb *ax25, struct sk_buff *skb, int poll_bit) -{ - unsigned char *frame; - - if (skb == NULL) - return; - - skb_reset_network_header(skb); - - if (ax25->modulus == AX25_MODULUS) { - frame = skb_push(skb, 1); - - *frame = AX25_I; - *frame |= (poll_bit) ? AX25_PF : 0; - *frame |= (ax25->vr << 5); - *frame |= (ax25->vs << 1); - } else { - frame = skb_push(skb, 2); - - frame[0] = AX25_I; - frame[0] |= (ax25->vs << 1); - frame[1] = (poll_bit) ? AX25_EPF : 0; - frame[1] |= (ax25->vr << 1); - } - - ax25_start_idletimer(ax25); - - ax25_transmit_buffer(ax25, skb, AX25_COMMAND); -} - -void ax25_kick(ax25_cb *ax25) -{ - struct sk_buff *skb, *skbn; - int last = 1; - unsigned short start, end, next; - - if (ax25->state != AX25_STATE_3 && ax25->state != AX25_STATE_4) - return; - - if (ax25->condition & AX25_COND_PEER_RX_BUSY) - return; - - if (skb_peek(&ax25->write_queue) == NULL) - return; - - start = (skb_peek(&ax25->ack_queue) == NULL) ? ax25->va : ax25->vs; - end = (ax25->va + ax25->window) % ax25->modulus; - - if (start == end) - return; - - /* - * Transmit data until either we're out of data to send or - * the window is full. Send a poll on the final I frame if - * the window is filled. - */ - - /* - * Dequeue the frame and copy it. - * Check for race with ax25_clear_queues(). - */ - skb = skb_dequeue(&ax25->write_queue); - if (!skb) - return; - - ax25->vs = start; - - do { - if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) { - skb_queue_head(&ax25->write_queue, skb); - break; - } - - if (skb->sk != NULL) - skb_set_owner_w(skbn, skb->sk); - - next = (ax25->vs + 1) % ax25->modulus; - last = (next == end); - - /* - * Transmit the frame copy. - * bke 960114: do not set the Poll bit on the last frame - * in DAMA mode. - */ - switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) { - case AX25_PROTO_STD_SIMPLEX: - case AX25_PROTO_STD_DUPLEX: - ax25_send_iframe(ax25, skbn, (last) ? AX25_POLLON : AX25_POLLOFF); - break; - -#ifdef CONFIG_AX25_DAMA_SLAVE - case AX25_PROTO_DAMA_SLAVE: - ax25_send_iframe(ax25, skbn, AX25_POLLOFF); - break; -#endif - } - - ax25->vs = next; - - /* - * Requeue the original data frame. - */ - skb_queue_tail(&ax25->ack_queue, skb); - - } while (!last && (skb = skb_dequeue(&ax25->write_queue)) != NULL); - - ax25->condition &= ~AX25_COND_ACK_PENDING; - - if (!ax25_t1timer_running(ax25)) { - ax25_stop_t3timer(ax25); - ax25_calculate_t1(ax25); - ax25_start_t1timer(ax25); - } -} - -void ax25_transmit_buffer(ax25_cb *ax25, struct sk_buff *skb, int type) -{ - unsigned char *ptr; - int headroom; - - if (ax25->ax25_dev == NULL) { - ax25_disconnect(ax25, ENETUNREACH); - return; - } - - headroom = ax25_addr_size(ax25->digipeat); - - if (unlikely(skb_headroom(skb) < headroom)) { - skb = skb_expand_head(skb, headroom); - if (!skb) { - printk(KERN_CRIT "AX.25: ax25_transmit_buffer - out of memory\n"); - return; - } - } - - ptr = skb_push(skb, headroom); - - ax25_addr_build(ptr, &ax25->source_addr, &ax25->dest_addr, ax25->digipeat, type, ax25->modulus); - - ax25_queue_xmit(skb, ax25->ax25_dev->dev); -} - -/* - * A small shim to dev_queue_xmit to add the KISS control byte, and do - * any packet forwarding in operation. - */ -void ax25_queue_xmit(struct sk_buff *skb, struct net_device *dev) -{ - unsigned char *ptr; - - rcu_read_lock(); - skb->protocol = ax25_type_trans(skb, ax25_fwd_dev(dev)); - rcu_read_unlock(); - - ptr = skb_push(skb, 1); - *ptr = 0x00; /* KISS */ - - dev_queue_xmit(skb); -} - -int ax25_check_iframes_acked(ax25_cb *ax25, unsigned short nr) -{ - if (ax25->vs == nr) { - ax25_frames_acked(ax25, nr); - ax25_calculate_rtt(ax25); - ax25_stop_t1timer(ax25); - ax25_start_t3timer(ax25); - return 1; - } else { - if (ax25->va != nr) { - ax25_frames_acked(ax25, nr); - ax25_calculate_t1(ax25); - ax25_start_t1timer(ax25); - return 1; - } - } - return 0; -} diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c deleted file mode 100644 index 1d5c59ccf142..000000000000 --- a/net/ax25/ax25_route.c +++ /dev/null @@ -1,416 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk) - * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - * Copyright (C) Steven Whitehouse GW7RRM (stevew@acm.org) - * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de) - * Copyright (C) Hans-Joachim Hetscher DD8NE (dd8ne@bnv-bamberg.de) - * Copyright (C) Frederic Rible F1OAT (frible@teaser.fr) - */ - -#include <linux/capability.h> -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/timer.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/slab.h> -#include <net/ax25.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/if_arp.h> -#include <linux/skbuff.h> -#include <linux/spinlock.h> -#include <net/sock.h> -#include <linux/uaccess.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <linux/init.h> -#include <linux/seq_file.h> -#include <linux/export.h> - -static ax25_route *ax25_route_list; -DEFINE_RWLOCK(ax25_route_lock); - -void ax25_rt_device_down(struct net_device *dev) -{ - ax25_route *s, *t, *ax25_rt; - - write_lock_bh(&ax25_route_lock); - ax25_rt = ax25_route_list; - while (ax25_rt != NULL) { - s = ax25_rt; - ax25_rt = ax25_rt->next; - - if (s->dev == dev) { - if (ax25_route_list == s) { - ax25_route_list = s->next; - kfree(s->digipeat); - kfree(s); - } else { - for (t = ax25_route_list; t != NULL; t = t->next) { - if (t->next == s) { - t->next = s->next; - kfree(s->digipeat); - kfree(s); - break; - } - } - } - } - } - write_unlock_bh(&ax25_route_lock); -} - -static int __must_check ax25_rt_add(struct ax25_routes_struct *route) -{ - ax25_route *ax25_rt; - ax25_dev *ax25_dev; - int i; - - if (route->digi_count > AX25_MAX_DIGIS) - return -EINVAL; - - ax25_dev = ax25_addr_ax25dev(&route->port_addr); - if (!ax25_dev) - return -EINVAL; - - write_lock_bh(&ax25_route_lock); - - ax25_rt = ax25_route_list; - while (ax25_rt != NULL) { - if (ax25cmp(&ax25_rt->callsign, &route->dest_addr) == 0 && - ax25_rt->dev == ax25_dev->dev) { - kfree(ax25_rt->digipeat); - ax25_rt->digipeat = NULL; - if (route->digi_count != 0) { - if ((ax25_rt->digipeat = kmalloc_obj(ax25_digi, GFP_ATOMIC)) == NULL) { - write_unlock_bh(&ax25_route_lock); - ax25_dev_put(ax25_dev); - return -ENOMEM; - } - ax25_rt->digipeat->lastrepeat = -1; - ax25_rt->digipeat->ndigi = route->digi_count; - for (i = 0; i < route->digi_count; i++) { - ax25_rt->digipeat->repeated[i] = 0; - ax25_rt->digipeat->calls[i] = route->digi_addr[i]; - } - } - write_unlock_bh(&ax25_route_lock); - ax25_dev_put(ax25_dev); - return 0; - } - ax25_rt = ax25_rt->next; - } - - if ((ax25_rt = kmalloc_obj(ax25_route, GFP_ATOMIC)) == NULL) { - write_unlock_bh(&ax25_route_lock); - ax25_dev_put(ax25_dev); - return -ENOMEM; - } - - ax25_rt->callsign = route->dest_addr; - ax25_rt->dev = ax25_dev->dev; - ax25_rt->digipeat = NULL; - ax25_rt->ip_mode = ' '; - if (route->digi_count != 0) { - if ((ax25_rt->digipeat = kmalloc_obj(ax25_digi, GFP_ATOMIC)) == NULL) { - write_unlock_bh(&ax25_route_lock); - kfree(ax25_rt); - ax25_dev_put(ax25_dev); - return -ENOMEM; - } - ax25_rt->digipeat->lastrepeat = -1; - ax25_rt->digipeat->ndigi = route->digi_count; - for (i = 0; i < route->digi_count; i++) { - ax25_rt->digipeat->repeated[i] = 0; - ax25_rt->digipeat->calls[i] = route->digi_addr[i]; - } - } - ax25_rt->next = ax25_route_list; - ax25_route_list = ax25_rt; - write_unlock_bh(&ax25_route_lock); - ax25_dev_put(ax25_dev); - - return 0; -} - -void __ax25_put_route(ax25_route *ax25_rt) -{ - kfree(ax25_rt->digipeat); - kfree(ax25_rt); -} - -static int ax25_rt_del(struct ax25_routes_struct *route) -{ - ax25_route *s, *t, *ax25_rt; - ax25_dev *ax25_dev; - - if ((ax25_dev = ax25_addr_ax25dev(&route->port_addr)) == NULL) - return -EINVAL; - - write_lock_bh(&ax25_route_lock); - - ax25_rt = ax25_route_list; - while (ax25_rt != NULL) { - s = ax25_rt; - ax25_rt = ax25_rt->next; - if (s->dev == ax25_dev->dev && - ax25cmp(&route->dest_addr, &s->callsign) == 0) { - if (ax25_route_list == s) { - ax25_route_list = s->next; - __ax25_put_route(s); - } else { - for (t = ax25_route_list; t != NULL; t = t->next) { - if (t->next == s) { - t->next = s->next; - __ax25_put_route(s); - break; - } - } - } - } - } - write_unlock_bh(&ax25_route_lock); - ax25_dev_put(ax25_dev); - - return 0; -} - -static int ax25_rt_opt(struct ax25_route_opt_struct *rt_option) -{ - ax25_route *ax25_rt; - ax25_dev *ax25_dev; - int err = 0; - - if ((ax25_dev = ax25_addr_ax25dev(&rt_option->port_addr)) == NULL) - return -EINVAL; - - write_lock_bh(&ax25_route_lock); - - ax25_rt = ax25_route_list; - while (ax25_rt != NULL) { - if (ax25_rt->dev == ax25_dev->dev && - ax25cmp(&rt_option->dest_addr, &ax25_rt->callsign) == 0) { - switch (rt_option->cmd) { - case AX25_SET_RT_IPMODE: - switch (rt_option->arg) { - case ' ': - case 'D': - case 'V': - ax25_rt->ip_mode = rt_option->arg; - break; - default: - err = -EINVAL; - goto out; - } - break; - default: - err = -EINVAL; - goto out; - } - } - ax25_rt = ax25_rt->next; - } - -out: - write_unlock_bh(&ax25_route_lock); - ax25_dev_put(ax25_dev); - return err; -} - -int ax25_rt_ioctl(unsigned int cmd, void __user *arg) -{ - struct ax25_route_opt_struct rt_option; - struct ax25_routes_struct route; - - switch (cmd) { - case SIOCADDRT: - if (copy_from_user(&route, arg, sizeof(route))) - return -EFAULT; - return ax25_rt_add(&route); - - case SIOCDELRT: - if (copy_from_user(&route, arg, sizeof(route))) - return -EFAULT; - return ax25_rt_del(&route); - - case SIOCAX25OPTRT: - if (copy_from_user(&rt_option, arg, sizeof(rt_option))) - return -EFAULT; - return ax25_rt_opt(&rt_option); - - default: - return -EINVAL; - } -} - -#ifdef CONFIG_PROC_FS - -static void *ax25_rt_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(ax25_route_lock) -{ - struct ax25_route *ax25_rt; - int i = 1; - - read_lock(&ax25_route_lock); - if (*pos == 0) - return SEQ_START_TOKEN; - - for (ax25_rt = ax25_route_list; ax25_rt != NULL; ax25_rt = ax25_rt->next) { - if (i == *pos) - return ax25_rt; - ++i; - } - - return NULL; -} - -static void *ax25_rt_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - ++*pos; - return (v == SEQ_START_TOKEN) ? ax25_route_list : - ((struct ax25_route *) v)->next; -} - -static void ax25_rt_seq_stop(struct seq_file *seq, void *v) - __releases(ax25_route_lock) -{ - read_unlock(&ax25_route_lock); -} - -static int ax25_rt_seq_show(struct seq_file *seq, void *v) -{ - char buf[11]; - - if (v == SEQ_START_TOKEN) - seq_puts(seq, "callsign dev mode digipeaters\n"); - else { - struct ax25_route *ax25_rt = v; - const char *callsign; - int i; - - if (ax25cmp(&ax25_rt->callsign, &null_ax25_address) == 0) - callsign = "default"; - else - callsign = ax2asc(buf, &ax25_rt->callsign); - - seq_printf(seq, "%-9s %-4s", - callsign, - ax25_rt->dev ? ax25_rt->dev->name : "???"); - - switch (ax25_rt->ip_mode) { - case 'V': - seq_puts(seq, " vc"); - break; - case 'D': - seq_puts(seq, " dg"); - break; - default: - seq_puts(seq, " *"); - break; - } - - if (ax25_rt->digipeat != NULL) - for (i = 0; i < ax25_rt->digipeat->ndigi; i++) - seq_printf(seq, " %s", - ax2asc(buf, &ax25_rt->digipeat->calls[i])); - - seq_puts(seq, "\n"); - } - return 0; -} - -const struct seq_operations ax25_rt_seqops = { - .start = ax25_rt_seq_start, - .next = ax25_rt_seq_next, - .stop = ax25_rt_seq_stop, - .show = ax25_rt_seq_show, -}; -#endif - -/* - * Find AX.25 route - * - * Only routes with a reference count of zero can be destroyed. - * Must be called with ax25_route_lock read locked. - */ -ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev) -{ - ax25_route *ax25_spe_rt = NULL; - ax25_route *ax25_def_rt = NULL; - ax25_route *ax25_rt; - - /* - * Bind to the physical interface we heard them on, or the default - * route if none is found; - */ - for (ax25_rt = ax25_route_list; ax25_rt != NULL; ax25_rt = ax25_rt->next) { - if (dev == NULL) { - if (ax25cmp(&ax25_rt->callsign, addr) == 0 && ax25_rt->dev != NULL) - ax25_spe_rt = ax25_rt; - if (ax25cmp(&ax25_rt->callsign, &null_ax25_address) == 0 && ax25_rt->dev != NULL) - ax25_def_rt = ax25_rt; - } else { - if (ax25cmp(&ax25_rt->callsign, addr) == 0 && ax25_rt->dev == dev) - ax25_spe_rt = ax25_rt; - if (ax25cmp(&ax25_rt->callsign, &null_ax25_address) == 0 && ax25_rt->dev == dev) - ax25_def_rt = ax25_rt; - } - } - - ax25_rt = ax25_def_rt; - if (ax25_spe_rt != NULL) - ax25_rt = ax25_spe_rt; - - return ax25_rt; -} - - -struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src, - ax25_address *dest, ax25_digi *digi) -{ - unsigned char *bp; - int len; - - len = digi->ndigi * AX25_ADDR_LEN; - - if (unlikely(skb_headroom(skb) < len)) { - skb = skb_expand_head(skb, len); - if (!skb) { - printk(KERN_CRIT "AX.25: ax25_dg_build_path - out of memory\n"); - return NULL; - } - } - - bp = skb_push(skb, len); - - ax25_addr_build(bp, src, dest, digi, AX25_COMMAND, AX25_MODULUS); - - return skb; -} - -/* - * Free all memory associated with routing structures. - */ -void __exit ax25_rt_free(void) -{ - ax25_route *s, *ax25_rt = ax25_route_list; - - write_lock_bh(&ax25_route_lock); - while (ax25_rt != NULL) { - s = ax25_rt; - ax25_rt = ax25_rt->next; - - kfree(s->digipeat); - kfree(s); - } - write_unlock_bh(&ax25_route_lock); -} diff --git a/net/ax25/ax25_std_in.c b/net/ax25/ax25_std_in.c deleted file mode 100644 index ba176196ae06..000000000000 --- a/net/ax25/ax25_std_in.c +++ /dev/null @@ -1,443 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk) - * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de) - * Copyright (C) Hans-Joachim Hetscher DD8NE (dd8ne@bnv-bamberg.de) - * - * Most of this code is based on the SDL diagrams published in the 7th ARRL - * Computer Networking Conference papers. The diagrams have mistakes in them, - * but are mostly correct. Before you modify the code could you read the SDL - * diagrams as the code is not obvious and probably very easy to break. - */ -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <net/ax25.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <net/tcp_states.h> -#include <linux/uaccess.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/interrupt.h> - -/* - * State machine for state 1, Awaiting Connection State. - * The handling of the timer(s) is in file ax25_std_timer.c. - * Handling of state 0 and connection release is in ax25.c. - */ -static int ax25_std_state1_machine(ax25_cb *ax25, struct sk_buff *skb, int frametype, int pf, int type) -{ - switch (frametype) { - case AX25_SABM: - ax25->modulus = AX25_MODULUS; - ax25->window = ax25->ax25_dev->values[AX25_VALUES_WINDOW]; - ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE); - break; - - case AX25_SABME: - ax25->modulus = AX25_EMODULUS; - ax25->window = ax25->ax25_dev->values[AX25_VALUES_EWINDOW]; - ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE); - break; - - case AX25_DISC: - ax25_send_control(ax25, AX25_DM, pf, AX25_RESPONSE); - break; - - case AX25_UA: - if (pf) { - ax25_calculate_rtt(ax25); - ax25_stop_t1timer(ax25); - ax25_start_t3timer(ax25); - ax25_start_idletimer(ax25); - ax25->vs = 0; - ax25->va = 0; - ax25->vr = 0; - ax25->state = AX25_STATE_3; - ax25->n2count = 0; - if (ax25->sk != NULL) { - bh_lock_sock(ax25->sk); - ax25->sk->sk_state = TCP_ESTABLISHED; - /* For WAIT_SABM connections we will produce an accept ready socket here */ - if (!sock_flag(ax25->sk, SOCK_DEAD)) - ax25->sk->sk_state_change(ax25->sk); - bh_unlock_sock(ax25->sk); - } - } - break; - - case AX25_DM: - if (pf) { - if (ax25->modulus == AX25_MODULUS) { - ax25_disconnect(ax25, ECONNREFUSED); - } else { - ax25->modulus = AX25_MODULUS; - ax25->window = ax25->ax25_dev->values[AX25_VALUES_WINDOW]; - } - } - break; - - default: - break; - } - - return 0; -} - -/* - * State machine for state 2, Awaiting Release State. - * The handling of the timer(s) is in file ax25_std_timer.c - * Handling of state 0 and connection release is in ax25.c. - */ -static int ax25_std_state2_machine(ax25_cb *ax25, struct sk_buff *skb, int frametype, int pf, int type) -{ - switch (frametype) { - case AX25_SABM: - case AX25_SABME: - ax25_send_control(ax25, AX25_DM, pf, AX25_RESPONSE); - break; - - case AX25_DISC: - ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE); - ax25_disconnect(ax25, 0); - break; - - case AX25_DM: - case AX25_UA: - if (pf) - ax25_disconnect(ax25, 0); - break; - - case AX25_I: - case AX25_REJ: - case AX25_RNR: - case AX25_RR: - if (pf) ax25_send_control(ax25, AX25_DM, AX25_POLLON, AX25_RESPONSE); - break; - - default: - break; - } - - return 0; -} - -/* - * State machine for state 3, Connected State. - * The handling of the timer(s) is in file ax25_std_timer.c - * Handling of state 0 and connection release is in ax25.c. - */ -static int ax25_std_state3_machine(ax25_cb *ax25, struct sk_buff *skb, int frametype, int ns, int nr, int pf, int type) -{ - int queued = 0; - - switch (frametype) { - case AX25_SABM: - case AX25_SABME: - if (frametype == AX25_SABM) { - ax25->modulus = AX25_MODULUS; - ax25->window = ax25->ax25_dev->values[AX25_VALUES_WINDOW]; - } else { - ax25->modulus = AX25_EMODULUS; - ax25->window = ax25->ax25_dev->values[AX25_VALUES_EWINDOW]; - } - ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE); - ax25_stop_t1timer(ax25); - ax25_stop_t2timer(ax25); - ax25_start_t3timer(ax25); - ax25_start_idletimer(ax25); - ax25->condition = 0x00; - ax25->vs = 0; - ax25->va = 0; - ax25->vr = 0; - ax25_requeue_frames(ax25); - break; - - case AX25_DISC: - ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE); - ax25_disconnect(ax25, 0); - break; - - case AX25_DM: - ax25_disconnect(ax25, ECONNRESET); - break; - - case AX25_RR: - case AX25_RNR: - if (frametype == AX25_RR) - ax25->condition &= ~AX25_COND_PEER_RX_BUSY; - else - ax25->condition |= AX25_COND_PEER_RX_BUSY; - if (type == AX25_COMMAND && pf) - ax25_std_enquiry_response(ax25); - if (ax25_validate_nr(ax25, nr)) { - ax25_check_iframes_acked(ax25, nr); - } else { - ax25_std_nr_error_recovery(ax25); - ax25->state = AX25_STATE_1; - } - break; - - case AX25_REJ: - ax25->condition &= ~AX25_COND_PEER_RX_BUSY; - if (type == AX25_COMMAND && pf) - ax25_std_enquiry_response(ax25); - if (ax25_validate_nr(ax25, nr)) { - ax25_frames_acked(ax25, nr); - ax25_calculate_rtt(ax25); - ax25_stop_t1timer(ax25); - ax25_start_t3timer(ax25); - ax25_requeue_frames(ax25); - } else { - ax25_std_nr_error_recovery(ax25); - ax25->state = AX25_STATE_1; - } - break; - - case AX25_I: - if (!ax25_validate_nr(ax25, nr)) { - ax25_std_nr_error_recovery(ax25); - ax25->state = AX25_STATE_1; - break; - } - if (ax25->condition & AX25_COND_PEER_RX_BUSY) { - ax25_frames_acked(ax25, nr); - } else { - ax25_check_iframes_acked(ax25, nr); - } - if (ax25->condition & AX25_COND_OWN_RX_BUSY) { - if (pf) ax25_std_enquiry_response(ax25); - break; - } - if (ns == ax25->vr) { - ax25->vr = (ax25->vr + 1) % ax25->modulus; - queued = ax25_rx_iframe(ax25, skb); - if (ax25->condition & AX25_COND_OWN_RX_BUSY) - ax25->vr = ns; /* ax25->vr - 1 */ - ax25->condition &= ~AX25_COND_REJECT; - if (pf) { - ax25_std_enquiry_response(ax25); - } else { - if (!(ax25->condition & AX25_COND_ACK_PENDING)) { - ax25->condition |= AX25_COND_ACK_PENDING; - ax25_start_t2timer(ax25); - } - } - } else { - if (ax25->condition & AX25_COND_REJECT) { - if (pf) ax25_std_enquiry_response(ax25); - } else { - ax25->condition |= AX25_COND_REJECT; - ax25_send_control(ax25, AX25_REJ, pf, AX25_RESPONSE); - ax25->condition &= ~AX25_COND_ACK_PENDING; - } - } - break; - - case AX25_FRMR: - case AX25_ILLEGAL: - ax25_std_establish_data_link(ax25); - ax25->state = AX25_STATE_1; - break; - - default: - break; - } - - return queued; -} - -/* - * State machine for state 4, Timer Recovery State. - * The handling of the timer(s) is in file ax25_std_timer.c - * Handling of state 0 and connection release is in ax25.c. - */ -static int ax25_std_state4_machine(ax25_cb *ax25, struct sk_buff *skb, int frametype, int ns, int nr, int pf, int type) -{ - int queued = 0; - - switch (frametype) { - case AX25_SABM: - case AX25_SABME: - if (frametype == AX25_SABM) { - ax25->modulus = AX25_MODULUS; - ax25->window = ax25->ax25_dev->values[AX25_VALUES_WINDOW]; - } else { - ax25->modulus = AX25_EMODULUS; - ax25->window = ax25->ax25_dev->values[AX25_VALUES_EWINDOW]; - } - ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE); - ax25_stop_t1timer(ax25); - ax25_stop_t2timer(ax25); - ax25_start_t3timer(ax25); - ax25_start_idletimer(ax25); - ax25->condition = 0x00; - ax25->vs = 0; - ax25->va = 0; - ax25->vr = 0; - ax25->state = AX25_STATE_3; - ax25->n2count = 0; - ax25_requeue_frames(ax25); - break; - - case AX25_DISC: - ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE); - ax25_disconnect(ax25, 0); - break; - - case AX25_DM: - ax25_disconnect(ax25, ECONNRESET); - break; - - case AX25_RR: - case AX25_RNR: - if (frametype == AX25_RR) - ax25->condition &= ~AX25_COND_PEER_RX_BUSY; - else - ax25->condition |= AX25_COND_PEER_RX_BUSY; - if (type == AX25_RESPONSE && pf) { - ax25_stop_t1timer(ax25); - ax25->n2count = 0; - if (ax25_validate_nr(ax25, nr)) { - ax25_frames_acked(ax25, nr); - if (ax25->vs == ax25->va) { - ax25_start_t3timer(ax25); - ax25->state = AX25_STATE_3; - } else { - ax25_requeue_frames(ax25); - } - } else { - ax25_std_nr_error_recovery(ax25); - ax25->state = AX25_STATE_1; - } - break; - } - if (type == AX25_COMMAND && pf) - ax25_std_enquiry_response(ax25); - if (ax25_validate_nr(ax25, nr)) { - ax25_frames_acked(ax25, nr); - } else { - ax25_std_nr_error_recovery(ax25); - ax25->state = AX25_STATE_1; - } - break; - - case AX25_REJ: - ax25->condition &= ~AX25_COND_PEER_RX_BUSY; - if (pf && type == AX25_RESPONSE) { - ax25_stop_t1timer(ax25); - ax25->n2count = 0; - if (ax25_validate_nr(ax25, nr)) { - ax25_frames_acked(ax25, nr); - if (ax25->vs == ax25->va) { - ax25_start_t3timer(ax25); - ax25->state = AX25_STATE_3; - } else { - ax25_requeue_frames(ax25); - } - } else { - ax25_std_nr_error_recovery(ax25); - ax25->state = AX25_STATE_1; - } - break; - } - if (type == AX25_COMMAND && pf) - ax25_std_enquiry_response(ax25); - if (ax25_validate_nr(ax25, nr)) { - ax25_frames_acked(ax25, nr); - ax25_requeue_frames(ax25); - } else { - ax25_std_nr_error_recovery(ax25); - ax25->state = AX25_STATE_1; - } - break; - - case AX25_I: - if (!ax25_validate_nr(ax25, nr)) { - ax25_std_nr_error_recovery(ax25); - ax25->state = AX25_STATE_1; - break; - } - ax25_frames_acked(ax25, nr); - if (ax25->condition & AX25_COND_OWN_RX_BUSY) { - if (pf) - ax25_std_enquiry_response(ax25); - break; - } - if (ns == ax25->vr) { - ax25->vr = (ax25->vr + 1) % ax25->modulus; - queued = ax25_rx_iframe(ax25, skb); - if (ax25->condition & AX25_COND_OWN_RX_BUSY) - ax25->vr = ns; /* ax25->vr - 1 */ - ax25->condition &= ~AX25_COND_REJECT; - if (pf) { - ax25_std_enquiry_response(ax25); - } else { - if (!(ax25->condition & AX25_COND_ACK_PENDING)) { - ax25->condition |= AX25_COND_ACK_PENDING; - ax25_start_t2timer(ax25); - } - } - } else { - if (ax25->condition & AX25_COND_REJECT) { - if (pf) ax25_std_enquiry_response(ax25); - } else { - ax25->condition |= AX25_COND_REJECT; - ax25_send_control(ax25, AX25_REJ, pf, AX25_RESPONSE); - ax25->condition &= ~AX25_COND_ACK_PENDING; - } - } - break; - - case AX25_FRMR: - case AX25_ILLEGAL: - ax25_std_establish_data_link(ax25); - ax25->state = AX25_STATE_1; - break; - - default: - break; - } - - return queued; -} - -/* - * Higher level upcall for a LAPB frame - */ -int ax25_std_frame_in(ax25_cb *ax25, struct sk_buff *skb, int type) -{ - int queued = 0, frametype, ns, nr, pf; - - frametype = ax25_decode(ax25, skb, &ns, &nr, &pf); - - switch (ax25->state) { - case AX25_STATE_1: - queued = ax25_std_state1_machine(ax25, skb, frametype, pf, type); - break; - case AX25_STATE_2: - queued = ax25_std_state2_machine(ax25, skb, frametype, pf, type); - break; - case AX25_STATE_3: - queued = ax25_std_state3_machine(ax25, skb, frametype, ns, nr, pf, type); - break; - case AX25_STATE_4: - queued = ax25_std_state4_machine(ax25, skb, frametype, ns, nr, pf, type); - break; - } - - ax25_kick(ax25); - - return queued; -} diff --git a/net/ax25/ax25_std_subr.c b/net/ax25/ax25_std_subr.c deleted file mode 100644 index 4c36f1342558..000000000000 --- a/net/ax25/ax25_std_subr.c +++ /dev/null @@ -1,83 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - */ -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <net/ax25.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <linux/uaccess.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/interrupt.h> - -/* - * The following routines are taken from page 170 of the 7th ARRL Computer - * Networking Conference paper, as is the whole state machine. - */ - -void ax25_std_nr_error_recovery(ax25_cb *ax25) -{ - ax25_std_establish_data_link(ax25); -} - -void ax25_std_establish_data_link(ax25_cb *ax25) -{ - ax25->condition = 0x00; - ax25->n2count = 0; - - if (ax25->modulus == AX25_MODULUS) - ax25_send_control(ax25, AX25_SABM, AX25_POLLON, AX25_COMMAND); - else - ax25_send_control(ax25, AX25_SABME, AX25_POLLON, AX25_COMMAND); - - ax25_calculate_t1(ax25); - ax25_stop_idletimer(ax25); - ax25_stop_t3timer(ax25); - ax25_stop_t2timer(ax25); - ax25_start_t1timer(ax25); -} - -void ax25_std_transmit_enquiry(ax25_cb *ax25) -{ - if (ax25->condition & AX25_COND_OWN_RX_BUSY) - ax25_send_control(ax25, AX25_RNR, AX25_POLLON, AX25_COMMAND); - else - ax25_send_control(ax25, AX25_RR, AX25_POLLON, AX25_COMMAND); - - ax25->condition &= ~AX25_COND_ACK_PENDING; - - ax25_calculate_t1(ax25); - ax25_start_t1timer(ax25); -} - -void ax25_std_enquiry_response(ax25_cb *ax25) -{ - if (ax25->condition & AX25_COND_OWN_RX_BUSY) - ax25_send_control(ax25, AX25_RNR, AX25_POLLON, AX25_RESPONSE); - else - ax25_send_control(ax25, AX25_RR, AX25_POLLON, AX25_RESPONSE); - - ax25->condition &= ~AX25_COND_ACK_PENDING; -} - -void ax25_std_timeout_response(ax25_cb *ax25) -{ - if (ax25->condition & AX25_COND_OWN_RX_BUSY) - ax25_send_control(ax25, AX25_RNR, AX25_POLLOFF, AX25_RESPONSE); - else - ax25_send_control(ax25, AX25_RR, AX25_POLLOFF, AX25_RESPONSE); - - ax25->condition &= ~AX25_COND_ACK_PENDING; -} diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c deleted file mode 100644 index b17da41210cb..000000000000 --- a/net/ax25/ax25_std_timer.c +++ /dev/null @@ -1,175 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk) - * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de) - * Copyright (C) Frederic Rible F1OAT (frible@teaser.fr) - */ -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <net/ax25.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <net/tcp_states.h> -#include <linux/uaccess.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/interrupt.h> - -void ax25_std_heartbeat_expiry(ax25_cb *ax25) -{ - struct sock *sk = ax25->sk; - - if (sk) - bh_lock_sock(sk); - - switch (ax25->state) { - case AX25_STATE_0: - case AX25_STATE_2: - /* Magic here: If we listen() and a new link dies before it - is accepted() it isn't 'dead' so doesn't get removed. */ - if (!sk || sock_flag(sk, SOCK_DESTROY) || - (sk->sk_state == TCP_LISTEN && - sock_flag(sk, SOCK_DEAD))) { - if (sk) { - sock_hold(sk); - ax25_destroy_socket(ax25); - bh_unlock_sock(sk); - /* Ungrab socket and destroy it */ - sock_put(sk); - } else - ax25_destroy_socket(ax25); - return; - } - break; - - case AX25_STATE_3: - case AX25_STATE_4: - /* - * Check the state of the receive buffer. - */ - if (sk != NULL) { - if (atomic_read(&sk->sk_rmem_alloc) < - (sk->sk_rcvbuf >> 1) && - (ax25->condition & AX25_COND_OWN_RX_BUSY)) { - ax25->condition &= ~AX25_COND_OWN_RX_BUSY; - ax25->condition &= ~AX25_COND_ACK_PENDING; - ax25_send_control(ax25, AX25_RR, AX25_POLLOFF, AX25_RESPONSE); - break; - } - } - } - - if (sk) - bh_unlock_sock(sk); - - ax25_start_heartbeat(ax25); -} - -void ax25_std_t2timer_expiry(ax25_cb *ax25) -{ - if (ax25->condition & AX25_COND_ACK_PENDING) { - ax25->condition &= ~AX25_COND_ACK_PENDING; - ax25_std_timeout_response(ax25); - } -} - -void ax25_std_t3timer_expiry(ax25_cb *ax25) -{ - ax25->n2count = 0; - ax25_std_transmit_enquiry(ax25); - ax25->state = AX25_STATE_4; -} - -void ax25_std_idletimer_expiry(ax25_cb *ax25) -{ - ax25_clear_queues(ax25); - - ax25->n2count = 0; - ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND); - ax25->state = AX25_STATE_2; - - ax25_calculate_t1(ax25); - ax25_start_t1timer(ax25); - ax25_stop_t2timer(ax25); - ax25_stop_t3timer(ax25); - - if (ax25->sk != NULL) { - bh_lock_sock(ax25->sk); - ax25->sk->sk_state = TCP_CLOSE; - ax25->sk->sk_err = 0; - ax25->sk->sk_shutdown |= SEND_SHUTDOWN; - if (!sock_flag(ax25->sk, SOCK_DEAD)) { - ax25->sk->sk_state_change(ax25->sk); - sock_set_flag(ax25->sk, SOCK_DEAD); - } - bh_unlock_sock(ax25->sk); - } -} - -void ax25_std_t1timer_expiry(ax25_cb *ax25) -{ - switch (ax25->state) { - case AX25_STATE_1: - if (ax25->n2count == ax25->n2) { - if (ax25->modulus == AX25_MODULUS) { - ax25_disconnect(ax25, ETIMEDOUT); - return; - } else { - ax25->modulus = AX25_MODULUS; - ax25->window = ax25->ax25_dev->values[AX25_VALUES_WINDOW]; - ax25->n2count = 0; - ax25_send_control(ax25, AX25_SABM, AX25_POLLON, AX25_COMMAND); - } - } else { - ax25->n2count++; - if (ax25->modulus == AX25_MODULUS) - ax25_send_control(ax25, AX25_SABM, AX25_POLLON, AX25_COMMAND); - else - ax25_send_control(ax25, AX25_SABME, AX25_POLLON, AX25_COMMAND); - } - break; - - case AX25_STATE_2: - if (ax25->n2count == ax25->n2) { - ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND); - if (!sock_flag(ax25->sk, SOCK_DESTROY)) - ax25_disconnect(ax25, ETIMEDOUT); - return; - } else { - ax25->n2count++; - ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND); - } - break; - - case AX25_STATE_3: - ax25->n2count = 1; - ax25_std_transmit_enquiry(ax25); - ax25->state = AX25_STATE_4; - break; - - case AX25_STATE_4: - if (ax25->n2count == ax25->n2) { - ax25_send_control(ax25, AX25_DM, AX25_POLLON, AX25_RESPONSE); - ax25_disconnect(ax25, ETIMEDOUT); - return; - } else { - ax25->n2count++; - ax25_std_transmit_enquiry(ax25); - } - break; - } - - ax25_calculate_t1(ax25); - ax25_start_t1timer(ax25); -} diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c deleted file mode 100644 index bff4b203a893..000000000000 --- a/net/ax25/ax25_subr.c +++ /dev/null @@ -1,296 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk) - * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de) - * Copyright (C) Frederic Rible F1OAT (frible@teaser.fr) - */ -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/slab.h> -#include <net/ax25.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <net/tcp_states.h> -#include <linux/uaccess.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/interrupt.h> - -/* - * This routine purges all the queues of frames. - */ -void ax25_clear_queues(ax25_cb *ax25) -{ - skb_queue_purge(&ax25->write_queue); - skb_queue_purge(&ax25->ack_queue); - skb_queue_purge(&ax25->reseq_queue); - skb_queue_purge(&ax25->frag_queue); -} - -/* - * This routine purges the input queue of those frames that have been - * acknowledged. This replaces the boxes labelled "V(a) <- N(r)" on the - * SDL diagram. - */ -void ax25_frames_acked(ax25_cb *ax25, unsigned short nr) -{ - struct sk_buff *skb; - - /* - * Remove all the ack-ed frames from the ack queue. - */ - if (ax25->va != nr) { - while (skb_peek(&ax25->ack_queue) != NULL && ax25->va != nr) { - skb = skb_dequeue(&ax25->ack_queue); - kfree_skb(skb); - ax25->va = (ax25->va + 1) % ax25->modulus; - } - } -} - -void ax25_requeue_frames(ax25_cb *ax25) -{ - struct sk_buff *skb; - - /* - * Requeue all the un-ack-ed frames on the output queue to be picked - * up by ax25_kick called from the timer. This arrangement handles the - * possibility of an empty output queue. - */ - while ((skb = skb_dequeue_tail(&ax25->ack_queue)) != NULL) - skb_queue_head(&ax25->write_queue, skb); -} - -/* - * Validate that the value of nr is between va and vs. Return true or - * false for testing. - */ -int ax25_validate_nr(ax25_cb *ax25, unsigned short nr) -{ - unsigned short vc = ax25->va; - - while (vc != ax25->vs) { - if (nr == vc) return 1; - vc = (vc + 1) % ax25->modulus; - } - - if (nr == ax25->vs) return 1; - - return 0; -} - -/* - * This routine is the centralised routine for parsing the control - * information for the different frame formats. - */ -int ax25_decode(ax25_cb *ax25, struct sk_buff *skb, int *ns, int *nr, int *pf) -{ - unsigned char *frame; - int frametype = AX25_ILLEGAL; - - frame = skb->data; - *ns = *nr = *pf = 0; - - if (ax25->modulus == AX25_MODULUS) { - if ((frame[0] & AX25_S) == 0) { - frametype = AX25_I; /* I frame - carries NR/NS/PF */ - *ns = (frame[0] >> 1) & 0x07; - *nr = (frame[0] >> 5) & 0x07; - *pf = frame[0] & AX25_PF; - } else if ((frame[0] & AX25_U) == 1) { /* S frame - take out PF/NR */ - frametype = frame[0] & 0x0F; - *nr = (frame[0] >> 5) & 0x07; - *pf = frame[0] & AX25_PF; - } else if ((frame[0] & AX25_U) == 3) { /* U frame - take out PF */ - frametype = frame[0] & ~AX25_PF; - *pf = frame[0] & AX25_PF; - } - skb_pull(skb, 1); - } else { - if ((frame[0] & AX25_S) == 0) { - frametype = AX25_I; /* I frame - carries NR/NS/PF */ - *ns = (frame[0] >> 1) & 0x7F; - *nr = (frame[1] >> 1) & 0x7F; - *pf = frame[1] & AX25_EPF; - skb_pull(skb, 2); - } else if ((frame[0] & AX25_U) == 1) { /* S frame - take out PF/NR */ - frametype = frame[0] & 0x0F; - *nr = (frame[1] >> 1) & 0x7F; - *pf = frame[1] & AX25_EPF; - skb_pull(skb, 2); - } else if ((frame[0] & AX25_U) == 3) { /* U frame - take out PF */ - frametype = frame[0] & ~AX25_PF; - *pf = frame[0] & AX25_PF; - skb_pull(skb, 1); - } - } - - return frametype; -} - -/* - * This routine is called when the HDLC layer internally generates a - * command or response for the remote machine ( eg. RR, UA etc. ). - * Only supervisory or unnumbered frames are processed. - */ -void ax25_send_control(ax25_cb *ax25, int frametype, int poll_bit, int type) -{ - struct sk_buff *skb; - unsigned char *dptr; - - if ((skb = alloc_skb(ax25->ax25_dev->dev->hard_header_len + 2, GFP_ATOMIC)) == NULL) - return; - - skb_reserve(skb, ax25->ax25_dev->dev->hard_header_len); - - skb_reset_network_header(skb); - - /* Assume a response - address structure for DTE */ - if (ax25->modulus == AX25_MODULUS) { - dptr = skb_put(skb, 1); - *dptr = frametype; - *dptr |= (poll_bit) ? AX25_PF : 0; - if ((frametype & AX25_U) == AX25_S) /* S frames carry NR */ - *dptr |= (ax25->vr << 5); - } else { - if ((frametype & AX25_U) == AX25_U) { - dptr = skb_put(skb, 1); - *dptr = frametype; - *dptr |= (poll_bit) ? AX25_PF : 0; - } else { - dptr = skb_put(skb, 2); - dptr[0] = frametype; - dptr[1] = (ax25->vr << 1); - dptr[1] |= (poll_bit) ? AX25_EPF : 0; - } - } - - ax25_transmit_buffer(ax25, skb, type); -} - -/* - * Send a 'DM' to an unknown connection attempt, or an invalid caller. - * - * Note: src here is the sender, thus it's the target of the DM - */ -void ax25_return_dm(struct net_device *dev, ax25_address *src, ax25_address *dest, ax25_digi *digi) -{ - struct sk_buff *skb; - char *dptr; - ax25_digi retdigi; - - if (dev == NULL) - return; - - if ((skb = alloc_skb(dev->hard_header_len + 1, GFP_ATOMIC)) == NULL) - return; /* Next SABM will get DM'd */ - - skb_reserve(skb, dev->hard_header_len); - skb_reset_network_header(skb); - - ax25_digi_invert(digi, &retdigi); - - dptr = skb_put(skb, 1); - - *dptr = AX25_DM | AX25_PF; - - /* - * Do the address ourselves - */ - dptr = skb_push(skb, ax25_addr_size(digi)); - dptr += ax25_addr_build(dptr, dest, src, &retdigi, AX25_RESPONSE, AX25_MODULUS); - - ax25_queue_xmit(skb, dev); -} - -/* - * Exponential backoff for AX.25 - */ -void ax25_calculate_t1(ax25_cb *ax25) -{ - int n, t = 2; - - switch (ax25->backoff) { - case 0: - break; - - case 1: - t += 2 * ax25->n2count; - break; - - case 2: - for (n = 0; n < ax25->n2count; n++) - t *= 2; - if (t > 8) t = 8; - break; - } - - ax25->t1 = t * ax25->rtt; -} - -/* - * Calculate the Round Trip Time - */ -void ax25_calculate_rtt(ax25_cb *ax25) -{ - if (ax25->backoff == 0) - return; - - if (ax25_t1timer_running(ax25) && ax25->n2count == 0) - ax25->rtt = (9 * ax25->rtt + ax25->t1 - ax25_display_timer(&ax25->t1timer)) / 10; - - if (ax25->rtt < AX25_T1CLAMPLO) - ax25->rtt = AX25_T1CLAMPLO; - - if (ax25->rtt > AX25_T1CLAMPHI) - ax25->rtt = AX25_T1CLAMPHI; -} - -void ax25_disconnect(ax25_cb *ax25, int reason) -{ - ax25_clear_queues(ax25); - - if (reason == ENETUNREACH) { - timer_delete_sync(&ax25->timer); - timer_delete_sync(&ax25->t1timer); - timer_delete_sync(&ax25->t2timer); - timer_delete_sync(&ax25->t3timer); - timer_delete_sync(&ax25->idletimer); - } else { - if (ax25->sk && !sock_flag(ax25->sk, SOCK_DESTROY)) - ax25_stop_heartbeat(ax25); - ax25_stop_t1timer(ax25); - ax25_stop_t2timer(ax25); - ax25_stop_t3timer(ax25); - ax25_stop_idletimer(ax25); - } - - ax25->state = AX25_STATE_0; - - ax25_link_failed(ax25, reason); - - if (ax25->sk != NULL) { - local_bh_disable(); - bh_lock_sock(ax25->sk); - ax25->sk->sk_state = TCP_CLOSE; - ax25->sk->sk_err = reason; - ax25->sk->sk_shutdown |= SEND_SHUTDOWN; - if (!sock_flag(ax25->sk, SOCK_DEAD)) { - ax25->sk->sk_state_change(ax25->sk); - sock_set_flag(ax25->sk, SOCK_DEAD); - } - bh_unlock_sock(ax25->sk); - local_bh_enable(); - } -} diff --git a/net/ax25/ax25_timer.c b/net/ax25/ax25_timer.c deleted file mode 100644 index a69bfbc8b679..000000000000 --- a/net/ax25/ax25_timer.c +++ /dev/null @@ -1,224 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk) - * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - * Copyright (C) Tomi Manninen OH2BNS (oh2bns@sral.fi) - * Copyright (C) Darryl Miles G7LED (dlm@g7led.demon.co.uk) - * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de) - * Copyright (C) Frederic Rible F1OAT (frible@teaser.fr) - * Copyright (C) 2002 Ralf Baechle DO1GRB (ralf@gnu.org) - */ -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/jiffies.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <net/ax25.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <linux/uaccess.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/interrupt.h> - -static void ax25_heartbeat_expiry(struct timer_list *); -static void ax25_t1timer_expiry(struct timer_list *); -static void ax25_t2timer_expiry(struct timer_list *); -static void ax25_t3timer_expiry(struct timer_list *); -static void ax25_idletimer_expiry(struct timer_list *); - -void ax25_setup_timers(ax25_cb *ax25) -{ - timer_setup(&ax25->timer, ax25_heartbeat_expiry, 0); - timer_setup(&ax25->t1timer, ax25_t1timer_expiry, 0); - timer_setup(&ax25->t2timer, ax25_t2timer_expiry, 0); - timer_setup(&ax25->t3timer, ax25_t3timer_expiry, 0); - timer_setup(&ax25->idletimer, ax25_idletimer_expiry, 0); -} - -void ax25_start_heartbeat(ax25_cb *ax25) -{ - mod_timer(&ax25->timer, jiffies + 5 * HZ); -} - -void ax25_start_t1timer(ax25_cb *ax25) -{ - mod_timer(&ax25->t1timer, jiffies + ax25->t1); -} - -void ax25_start_t2timer(ax25_cb *ax25) -{ - mod_timer(&ax25->t2timer, jiffies + ax25->t2); -} - -void ax25_start_t3timer(ax25_cb *ax25) -{ - if (ax25->t3 > 0) - mod_timer(&ax25->t3timer, jiffies + ax25->t3); - else - timer_delete(&ax25->t3timer); -} - -void ax25_start_idletimer(ax25_cb *ax25) -{ - if (ax25->idle > 0) - mod_timer(&ax25->idletimer, jiffies + ax25->idle); - else - timer_delete(&ax25->idletimer); -} - -void ax25_stop_heartbeat(ax25_cb *ax25) -{ - timer_delete(&ax25->timer); -} - -void ax25_stop_t1timer(ax25_cb *ax25) -{ - timer_delete(&ax25->t1timer); -} - -void ax25_stop_t2timer(ax25_cb *ax25) -{ - timer_delete(&ax25->t2timer); -} - -void ax25_stop_t3timer(ax25_cb *ax25) -{ - timer_delete(&ax25->t3timer); -} - -void ax25_stop_idletimer(ax25_cb *ax25) -{ - timer_delete(&ax25->idletimer); -} - -int ax25_t1timer_running(ax25_cb *ax25) -{ - return timer_pending(&ax25->t1timer); -} - -unsigned long ax25_display_timer(struct timer_list *timer) -{ - long delta = timer->expires - jiffies; - - if (!timer_pending(timer)) - return 0; - - return max(0L, delta); -} - -EXPORT_SYMBOL(ax25_display_timer); - -static void ax25_heartbeat_expiry(struct timer_list *t) -{ - int proto = AX25_PROTO_STD_SIMPLEX; - ax25_cb *ax25 = timer_container_of(ax25, t, timer); - - if (ax25->ax25_dev) - proto = ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]; - - switch (proto) { - case AX25_PROTO_STD_SIMPLEX: - case AX25_PROTO_STD_DUPLEX: - ax25_std_heartbeat_expiry(ax25); - break; - -#ifdef CONFIG_AX25_DAMA_SLAVE - case AX25_PROTO_DAMA_SLAVE: - if (ax25->ax25_dev->dama.slave) - ax25_ds_heartbeat_expiry(ax25); - else - ax25_std_heartbeat_expiry(ax25); - break; -#endif - } -} - -static void ax25_t1timer_expiry(struct timer_list *t) -{ - ax25_cb *ax25 = timer_container_of(ax25, t, t1timer); - - switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) { - case AX25_PROTO_STD_SIMPLEX: - case AX25_PROTO_STD_DUPLEX: - ax25_std_t1timer_expiry(ax25); - break; - -#ifdef CONFIG_AX25_DAMA_SLAVE - case AX25_PROTO_DAMA_SLAVE: - if (!ax25->ax25_dev->dama.slave) - ax25_std_t1timer_expiry(ax25); - break; -#endif - } -} - -static void ax25_t2timer_expiry(struct timer_list *t) -{ - ax25_cb *ax25 = timer_container_of(ax25, t, t2timer); - - switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) { - case AX25_PROTO_STD_SIMPLEX: - case AX25_PROTO_STD_DUPLEX: - ax25_std_t2timer_expiry(ax25); - break; - -#ifdef CONFIG_AX25_DAMA_SLAVE - case AX25_PROTO_DAMA_SLAVE: - if (!ax25->ax25_dev->dama.slave) - ax25_std_t2timer_expiry(ax25); - break; -#endif - } -} - -static void ax25_t3timer_expiry(struct timer_list *t) -{ - ax25_cb *ax25 = timer_container_of(ax25, t, t3timer); - - switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) { - case AX25_PROTO_STD_SIMPLEX: - case AX25_PROTO_STD_DUPLEX: - ax25_std_t3timer_expiry(ax25); - break; - -#ifdef CONFIG_AX25_DAMA_SLAVE - case AX25_PROTO_DAMA_SLAVE: - if (ax25->ax25_dev->dama.slave) - ax25_ds_t3timer_expiry(ax25); - else - ax25_std_t3timer_expiry(ax25); - break; -#endif - } -} - -static void ax25_idletimer_expiry(struct timer_list *t) -{ - ax25_cb *ax25 = timer_container_of(ax25, t, idletimer); - - switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) { - case AX25_PROTO_STD_SIMPLEX: - case AX25_PROTO_STD_DUPLEX: - ax25_std_idletimer_expiry(ax25); - break; - -#ifdef CONFIG_AX25_DAMA_SLAVE - case AX25_PROTO_DAMA_SLAVE: - if (ax25->ax25_dev->dama.slave) - ax25_ds_idletimer_expiry(ax25); - else - ax25_std_idletimer_expiry(ax25); - break; -#endif - } -} diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c deleted file mode 100644 index 159ce74273f0..000000000000 --- a/net/ax25/ax25_uid.c +++ /dev/null @@ -1,204 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - */ - -#include <linux/capability.h> -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/spinlock.h> -#include <linux/slab.h> -#include <net/ax25.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/if_arp.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <linux/uaccess.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <linux/list.h> -#include <linux/notifier.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/stat.h> -#include <linux/sysctl.h> -#include <linux/export.h> -#include <net/ip.h> -#include <net/arp.h> - -/* - * Callsign/UID mapper. This is in kernel space for security on multi-amateur machines. - */ - -static HLIST_HEAD(ax25_uid_list); -static DEFINE_RWLOCK(ax25_uid_lock); - -int ax25_uid_policy; - -EXPORT_SYMBOL(ax25_uid_policy); - -ax25_uid_assoc *ax25_findbyuid(kuid_t uid) -{ - ax25_uid_assoc *ax25_uid, *res = NULL; - - read_lock(&ax25_uid_lock); - ax25_uid_for_each(ax25_uid, &ax25_uid_list) { - if (uid_eq(ax25_uid->uid, uid)) { - ax25_uid_hold(ax25_uid); - res = ax25_uid; - break; - } - } - read_unlock(&ax25_uid_lock); - - return res; -} - -EXPORT_SYMBOL(ax25_findbyuid); - -int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax) -{ - ax25_uid_assoc *ax25_uid; - ax25_uid_assoc *user; - unsigned long res; - - switch (cmd) { - case SIOCAX25GETUID: - res = -ENOENT; - read_lock(&ax25_uid_lock); - ax25_uid_for_each(ax25_uid, &ax25_uid_list) { - if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0) { - res = from_kuid_munged(current_user_ns(), ax25_uid->uid); - break; - } - } - read_unlock(&ax25_uid_lock); - - return res; - - case SIOCAX25ADDUID: - { - kuid_t sax25_kuid; - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - sax25_kuid = make_kuid(current_user_ns(), sax->sax25_uid); - if (!uid_valid(sax25_kuid)) - return -EINVAL; - user = ax25_findbyuid(sax25_kuid); - if (user) { - ax25_uid_put(user); - return -EEXIST; - } - if (sax->sax25_uid == 0) - return -EINVAL; - if ((ax25_uid = kmalloc_obj(*ax25_uid)) == NULL) - return -ENOMEM; - - refcount_set(&ax25_uid->refcount, 1); - ax25_uid->uid = sax25_kuid; - ax25_uid->call = sax->sax25_call; - - write_lock(&ax25_uid_lock); - hlist_add_head(&ax25_uid->uid_node, &ax25_uid_list); - write_unlock(&ax25_uid_lock); - - return 0; - } - case SIOCAX25DELUID: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - ax25_uid = NULL; - write_lock(&ax25_uid_lock); - ax25_uid_for_each(ax25_uid, &ax25_uid_list) { - if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0) - break; - } - if (ax25_uid == NULL) { - write_unlock(&ax25_uid_lock); - return -ENOENT; - } - hlist_del_init(&ax25_uid->uid_node); - ax25_uid_put(ax25_uid); - write_unlock(&ax25_uid_lock); - - return 0; - - default: - return -EINVAL; - } - - return -EINVAL; /*NOTREACHED */ -} - -#ifdef CONFIG_PROC_FS - -static void *ax25_uid_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(ax25_uid_lock) -{ - read_lock(&ax25_uid_lock); - return seq_hlist_start_head(&ax25_uid_list, *pos); -} - -static void *ax25_uid_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - return seq_hlist_next(v, &ax25_uid_list, pos); -} - -static void ax25_uid_seq_stop(struct seq_file *seq, void *v) - __releases(ax25_uid_lock) -{ - read_unlock(&ax25_uid_lock); -} - -static int ax25_uid_seq_show(struct seq_file *seq, void *v) -{ - char buf[11]; - - if (v == SEQ_START_TOKEN) - seq_printf(seq, "Policy: %d\n", ax25_uid_policy); - else { - struct ax25_uid_assoc *pt; - - pt = hlist_entry(v, struct ax25_uid_assoc, uid_node); - seq_printf(seq, "%6d %s\n", - from_kuid_munged(seq_user_ns(seq), pt->uid), - ax2asc(buf, &pt->call)); - } - return 0; -} - -const struct seq_operations ax25_uid_seqops = { - .start = ax25_uid_seq_start, - .next = ax25_uid_seq_next, - .stop = ax25_uid_seq_stop, - .show = ax25_uid_seq_show, -}; -#endif - -/* - * Free all memory associated with UID/Callsign structures. - */ -void __exit ax25_uid_free(void) -{ - ax25_uid_assoc *ax25_uid; - - write_lock(&ax25_uid_lock); -again: - ax25_uid_for_each(ax25_uid, &ax25_uid_list) { - hlist_del_init(&ax25_uid->uid_node); - ax25_uid_put(ax25_uid); - goto again; - } - write_unlock(&ax25_uid_lock); -} diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c deleted file mode 100644 index 68753aa30334..000000000000 --- a/net/ax25/sysctl_net_ax25.c +++ /dev/null @@ -1,181 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright (C) 1996 Mike Shaver (shaver@zeroknowledge.com) - */ -#include <linux/mm.h> -#include <linux/slab.h> -#include <linux/sysctl.h> -#include <linux/spinlock.h> -#include <net/ax25.h> - -static int min_ipdefmode[1], max_ipdefmode[] = {1}; -static int min_axdefmode[1], max_axdefmode[] = {1}; -static int min_backoff[1], max_backoff[] = {2}; -static int min_conmode[1], max_conmode[] = {2}; -static int min_window[] = {1}, max_window[] = {7}; -static int min_ewindow[] = {1}, max_ewindow[] = {63}; -static int min_t1[] = {1}, max_t1[] = {30000}; -static int min_t2[] = {1}, max_t2[] = {20000}; -static int min_t3[1], max_t3[] = {3600000}; -static int min_idle[1], max_idle[] = {65535000}; -static int min_n2[] = {1}, max_n2[] = {31}; -static int min_paclen[] = {1}, max_paclen[] = {512}; -static int min_proto[1], max_proto[] = { AX25_PROTO_MAX }; -#ifdef CONFIG_AX25_DAMA_SLAVE -static int min_ds_timeout[1], max_ds_timeout[] = {65535000}; -#endif - -static const struct ctl_table ax25_param_table[] = { - { - .procname = "ip_default_mode", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_ipdefmode, - .extra2 = &max_ipdefmode - }, - { - .procname = "ax25_default_mode", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_axdefmode, - .extra2 = &max_axdefmode - }, - { - .procname = "backoff_type", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_backoff, - .extra2 = &max_backoff - }, - { - .procname = "connect_mode", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_conmode, - .extra2 = &max_conmode - }, - { - .procname = "standard_window_size", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_window, - .extra2 = &max_window - }, - { - .procname = "extended_window_size", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_ewindow, - .extra2 = &max_ewindow - }, - { - .procname = "t1_timeout", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_t1, - .extra2 = &max_t1 - }, - { - .procname = "t2_timeout", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_t2, - .extra2 = &max_t2 - }, - { - .procname = "t3_timeout", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_t3, - .extra2 = &max_t3 - }, - { - .procname = "idle_timeout", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_idle, - .extra2 = &max_idle - }, - { - .procname = "maximum_retry_count", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_n2, - .extra2 = &max_n2 - }, - { - .procname = "maximum_packet_length", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_paclen, - .extra2 = &max_paclen - }, - { - .procname = "protocol", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_proto, - .extra2 = &max_proto - }, -#ifdef CONFIG_AX25_DAMA_SLAVE - { - .procname = "dama_slave_timeout", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_ds_timeout, - .extra2 = &max_ds_timeout - }, -#endif -}; - -int ax25_register_dev_sysctl(ax25_dev *ax25_dev) -{ - char path[sizeof("net/ax25/") + IFNAMSIZ]; - int k; - struct ctl_table *table; - - table = kmemdup(ax25_param_table, sizeof(ax25_param_table), GFP_KERNEL); - if (!table) - return -ENOMEM; - - BUILD_BUG_ON(ARRAY_SIZE(ax25_param_table) != AX25_MAX_VALUES); - for (k = 0; k < AX25_MAX_VALUES; k++) - table[k].data = &ax25_dev->values[k]; - - snprintf(path, sizeof(path), "net/ax25/%s", ax25_dev->dev->name); - ax25_dev->sysheader = register_net_sysctl_sz(&init_net, path, table, - ARRAY_SIZE(ax25_param_table)); - if (!ax25_dev->sysheader) { - kfree(table); - return -ENOMEM; - } - return 0; -} - -void ax25_unregister_dev_sysctl(ax25_dev *ax25_dev) -{ - struct ctl_table_header *header = ax25_dev->sysheader; - const struct ctl_table *table; - - if (header) { - ax25_dev->sysheader = NULL; - table = header->ctl_table_arg; - unregister_net_sysctl_table(header); - kfree(table); - } -} diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index f28e9cbf8ad5..74ef7dc2b2f9 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -173,19 +173,12 @@ free_orig_node_hash: static struct batadv_neigh_node * batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface, const u8 *neigh_addr, - struct batadv_orig_node *orig_node, - struct batadv_orig_node *orig_neigh) + struct batadv_orig_node *orig_node) { struct batadv_neigh_node *neigh_node; neigh_node = batadv_neigh_node_get_or_create(orig_node, hard_iface, neigh_addr); - if (!neigh_node) - goto out; - - neigh_node->orig_node = orig_neigh; - -out: return neigh_node; } @@ -335,7 +328,7 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); const char *fwd_str; u8 packet_num; - s16 buff_pos; + int buff_pos; struct batadv_ogm_packet *batadv_ogm_packet; struct sk_buff *skb; u8 *packet_pos; @@ -907,6 +900,31 @@ static u8 batadv_iv_orig_ifinfo_sum(struct batadv_orig_node *orig_node, } /** + * batadv_iv_ogm_neigh_ifinfo_sum() - Get bcast_own sum for a last-hop neighbor + * @bat_priv: the bat priv with all the mesh interface information + * @neigh_node: last-hop neighbor of an originator + * + * Return: Number of replied (rebroadcasted) OGMs for the originator currently + * announced by the neighbor. Returns 0 if the neighbor's originator entry is + * not available anymore. + */ +static u8 batadv_iv_ogm_neigh_ifinfo_sum(struct batadv_priv *bat_priv, + const struct batadv_neigh_node *neigh_node) +{ + struct batadv_orig_node *orig_neigh; + u8 sum; + + orig_neigh = batadv_orig_hash_find(bat_priv, neigh_node->addr); + if (!orig_neigh) + return 0; + + sum = batadv_iv_orig_ifinfo_sum(orig_neigh, neigh_node->if_incoming); + batadv_orig_node_put(orig_neigh); + + return sum; +} + +/** * batadv_iv_ogm_orig_update() - use OGM to update corresponding data in an * originator * @bat_priv: the bat priv with all the mesh interface information @@ -975,17 +993,9 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, } if (!neigh_node) { - struct batadv_orig_node *orig_tmp; - - orig_tmp = batadv_iv_ogm_orig_get(bat_priv, ethhdr->h_source); - if (!orig_tmp) - goto unlock; - neigh_node = batadv_iv_ogm_neigh_new(if_incoming, ethhdr->h_source, - orig_node, orig_tmp); - - batadv_orig_node_put(orig_tmp); + orig_node); if (!neigh_node) goto unlock; } else { @@ -1037,10 +1047,9 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, */ if (router_ifinfo && neigh_ifinfo->bat_iv.tq_avg == router_ifinfo->bat_iv.tq_avg) { - sum_orig = batadv_iv_orig_ifinfo_sum(router->orig_node, - router->if_incoming); - sum_neigh = batadv_iv_orig_ifinfo_sum(neigh_node->orig_node, - neigh_node->if_incoming); + sum_orig = batadv_iv_ogm_neigh_ifinfo_sum(bat_priv, router); + sum_neigh = batadv_iv_ogm_neigh_ifinfo_sum(bat_priv, + neigh_node); if (sum_orig >= sum_neigh) goto out; } @@ -1106,7 +1115,6 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, if (!neigh_node) neigh_node = batadv_iv_ogm_neigh_new(if_incoming, orig_neigh_node->orig, - orig_neigh_node, orig_neigh_node); if (!neigh_node) @@ -1303,6 +1311,32 @@ out: } /** + * batadv_orig_to_direct_router() - get direct next hop neighbor to an orig address + * @bat_priv: the bat priv with all the mesh interface information + * @orig_addr: the originator MAC address to search the best next hop router for + * @if_outgoing: the interface where the OGM should be sent to + * + * Return: A neighbor node which is the best router towards the given originator + * address. Bonding candidates are ignored. + */ +static struct batadv_neigh_node * +batadv_orig_to_direct_router(struct batadv_priv *bat_priv, u8 *orig_addr, + struct batadv_hard_iface *if_outgoing) +{ + struct batadv_neigh_node *neigh_node; + struct batadv_orig_node *orig_node; + + orig_node = batadv_orig_hash_find(bat_priv, orig_addr); + if (!orig_node) + return NULL; + + neigh_node = batadv_orig_router_get(orig_node, if_outgoing); + batadv_orig_node_put(orig_node); + + return neigh_node; +} + +/** * batadv_iv_ogm_process_per_outif() - process a batman iv OGM for an outgoing * interface * @skb: the skb containing the OGM @@ -1372,8 +1406,9 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset, router = batadv_orig_router_get(orig_node, if_outgoing); if (router) { - router_router = batadv_orig_router_get(router->orig_node, - if_outgoing); + router_router = batadv_orig_to_direct_router(bat_priv, + router->addr, + if_outgoing); router_ifinfo = batadv_neigh_ifinfo_get(router, if_outgoing); } diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 51fe028b9088..cec11f1251d6 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -318,8 +318,8 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw) if (claim->backbone_gw != backbone_gw) continue; - batadv_claim_put(claim); hlist_del_rcu(&claim->hash_entry); + batadv_claim_put(claim); } spin_unlock_bh(list_lock); } @@ -723,6 +723,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, if (unlikely(hash_added != 0)) { /* only local changes happened. */ + batadv_backbone_gw_put(backbone_gw); kfree(claim); return; } @@ -1288,6 +1289,13 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv, rcu_read_lock(); hlist_for_each_entry_rcu(claim, head, hash_entry) { + /* only purge claims not currently in the process of being released. + * Such claims could otherwise have a NULL-ptr backbone_gw set because + * they already went through batadv_claim_release() + */ + if (!kref_get_unless_zero(&claim->refcount)) + continue; + backbone_gw = batadv_bla_claim_get_backbone_gw(claim); if (now) goto purge_now; @@ -1313,6 +1321,7 @@ purge_now: claim->addr, claim->vid); skip: batadv_backbone_gw_put(backbone_gw); + batadv_claim_put(claim); } rcu_read_unlock(); } diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 3a35aadd8b41..a4d33ee0fda5 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -249,6 +249,7 @@ void batadv_mesh_free(struct net_device *mesh_iface) atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); batadv_purge_outstanding_packets(bat_priv, NULL); + batadv_tp_stop_all(bat_priv); batadv_gw_node_free(bat_priv); diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 2e42f6b348c8..066c76113fc4 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -12,6 +12,7 @@ #include <linux/byteorder/generic.h> #include <linux/cache.h> #include <linux/compiler.h> +#include <linux/completion.h> #include <linux/container_of.h> #include <linux/err.h> #include <linux/etherdevice.h> @@ -365,23 +366,38 @@ static void batadv_tp_vars_put(struct batadv_tp_vars *tp_vars) } /** - * batadv_tp_sender_cleanup() - cleanup sender data and drop and timer - * @bat_priv: the bat priv with all the mesh interface information - * @tp_vars: the private data of the current TP meter session to cleanup + * batadv_tp_list_detach() - remove tp session from mesh session list once + * @tp_vars: the private data of the current TP meter session */ -static void batadv_tp_sender_cleanup(struct batadv_priv *bat_priv, - struct batadv_tp_vars *tp_vars) +static void batadv_tp_list_detach(struct batadv_tp_vars *tp_vars) { - cancel_delayed_work(&tp_vars->finish_work); + bool detached = false; spin_lock_bh(&tp_vars->bat_priv->tp_list_lock); - hlist_del_rcu(&tp_vars->list); + if (!hlist_unhashed(&tp_vars->list)) { + hlist_del_init_rcu(&tp_vars->list); + detached = true; + } spin_unlock_bh(&tp_vars->bat_priv->tp_list_lock); + if (!detached) + return; + + atomic_dec(&tp_vars->bat_priv->tp_num); + /* drop list reference */ batadv_tp_vars_put(tp_vars); +} - atomic_dec(&tp_vars->bat_priv->tp_num); +/** + * batadv_tp_sender_cleanup() - cleanup sender data and drop and timer + * @tp_vars: the private data of the current TP meter session to cleanup + */ +static void batadv_tp_sender_cleanup(struct batadv_tp_vars *tp_vars) +{ + cancel_delayed_work_sync(&tp_vars->finish_work); + + batadv_tp_list_detach(tp_vars); /* kill the timer and remove its reference */ timer_delete_sync(&tp_vars->timer); @@ -886,7 +902,8 @@ out: batadv_orig_node_put(orig_node); batadv_tp_sender_end(bat_priv, tp_vars); - batadv_tp_sender_cleanup(bat_priv, tp_vars); + batadv_tp_sender_cleanup(tp_vars); + complete(&tp_vars->finished); batadv_tp_vars_put(tp_vars); @@ -918,7 +935,8 @@ static void batadv_tp_start_kthread(struct batadv_tp_vars *tp_vars) batadv_tp_vars_put(tp_vars); /* cleanup of failed tp meter variables */ - batadv_tp_sender_cleanup(bat_priv, tp_vars); + batadv_tp_sender_cleanup(tp_vars); + complete(&tp_vars->finished); return; } @@ -947,6 +965,13 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst, /* look for an already existing test towards this node */ spin_lock_bh(&bat_priv->tp_list_lock); + if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) { + spin_unlock_bh(&bat_priv->tp_list_lock); + batadv_tp_batctl_error_notify(BATADV_TP_REASON_DST_UNREACHABLE, + dst, bat_priv, session_cookie); + return; + } + tp_vars = batadv_tp_list_find(bat_priv, dst); if (tp_vars) { spin_unlock_bh(&bat_priv->tp_list_lock); @@ -969,6 +994,7 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst, tp_vars = kmalloc_obj(*tp_vars, GFP_ATOMIC); if (!tp_vars) { + atomic_dec(&bat_priv->tp_num); spin_unlock_bh(&bat_priv->tp_list_lock); batadv_dbg(BATADV_DBG_TP_METER, bat_priv, "Meter: %s cannot allocate list elements\n", @@ -1017,6 +1043,7 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst, tp_vars->start_time = jiffies; init_waitqueue_head(&tp_vars->more_bytes); + init_completion(&tp_vars->finished); spin_lock_init(&tp_vars->unacked_lock); INIT_LIST_HEAD(&tp_vars->unacked_list); @@ -1119,14 +1146,7 @@ static void batadv_tp_receiver_shutdown(struct timer_list *t) "Shutting down for inactivity (more than %dms) from %pM\n", BATADV_TP_RECV_TIMEOUT, tp_vars->other_end); - spin_lock_bh(&tp_vars->bat_priv->tp_list_lock); - hlist_del_rcu(&tp_vars->list); - spin_unlock_bh(&tp_vars->bat_priv->tp_list_lock); - - /* drop list reference */ - batadv_tp_vars_put(tp_vars); - - atomic_dec(&bat_priv->tp_num); + batadv_tp_list_detach(tp_vars); spin_lock_bh(&tp_vars->unacked_lock); list_for_each_entry_safe(un, safe, &tp_vars->unacked_list, list) { @@ -1329,9 +1349,12 @@ static struct batadv_tp_vars * batadv_tp_init_recv(struct batadv_priv *bat_priv, const struct batadv_icmp_tp_packet *icmp) { - struct batadv_tp_vars *tp_vars; + struct batadv_tp_vars *tp_vars = NULL; spin_lock_bh(&bat_priv->tp_list_lock); + if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) + goto out_unlock; + tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig, icmp->session); if (tp_vars) @@ -1344,8 +1367,10 @@ batadv_tp_init_recv(struct batadv_priv *bat_priv, } tp_vars = kmalloc_obj(*tp_vars, GFP_ATOMIC); - if (!tp_vars) + if (!tp_vars) { + atomic_dec(&bat_priv->tp_num); goto out_unlock; + } ether_addr_copy(tp_vars->other_end, icmp->orig); tp_vars->role = BATADV_TP_RECEIVER; @@ -1464,6 +1489,9 @@ void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb) { struct batadv_icmp_tp_packet *icmp; + if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) + goto out; + icmp = (struct batadv_icmp_tp_packet *)skb->data; switch (icmp->subtype) { @@ -1478,10 +1506,58 @@ void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb) "Received unknown TP Metric packet type %u\n", icmp->subtype); } + +out: consume_skb(skb); } /** + * batadv_tp_stop_all() - stop all currently running tp meter sessions + * @bat_priv: the bat priv with all the mesh interface information + */ +void batadv_tp_stop_all(struct batadv_priv *bat_priv) +{ + struct batadv_tp_vars *tp_vars[BATADV_TP_MAX_NUM]; + struct batadv_tp_vars *tp_var; + size_t count = 0; + size_t i; + + spin_lock_bh(&bat_priv->tp_list_lock); + hlist_for_each_entry(tp_var, &bat_priv->tp_list, list) { + if (WARN_ON_ONCE(count >= BATADV_TP_MAX_NUM)) + break; + + if (!kref_get_unless_zero(&tp_var->refcount)) + continue; + + tp_vars[count++] = tp_var; + } + spin_unlock_bh(&bat_priv->tp_list_lock); + + for (i = 0; i < count; i++) { + tp_var = tp_vars[i]; + + switch (tp_var->role) { + case BATADV_TP_SENDER: + batadv_tp_sender_shutdown(tp_var, + BATADV_TP_REASON_CANCEL); + wake_up(&tp_var->more_bytes); + wait_for_completion(&tp_var->finished); + break; + case BATADV_TP_RECEIVER: + batadv_tp_list_detach(tp_var); + if (timer_shutdown_sync(&tp_var->timer)) + batadv_tp_vars_put(tp_var); + break; + } + + batadv_tp_vars_put(tp_var); + } + + synchronize_net(); +} + +/** * batadv_tp_meter_init() - initialize global tp_meter structures */ void __init batadv_tp_meter_init(void) diff --git a/net/batman-adv/tp_meter.h b/net/batman-adv/tp_meter.h index f0046d366eac..4e97cd10cd02 100644 --- a/net/batman-adv/tp_meter.h +++ b/net/batman-adv/tp_meter.h @@ -17,6 +17,7 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst, u32 test_length, u32 *cookie); void batadv_tp_stop(struct batadv_priv *bat_priv, const u8 *dst, u8 return_value); +void batadv_tp_stop_all(struct batadv_priv *bat_priv); void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb); #endif /* _NET_BATMAN_ADV_TP_METER_H_ */ diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 8fc5fe0e9b05..daa06f421154 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -14,6 +14,7 @@ #include <linux/average.h> #include <linux/bitops.h> #include <linux/compiler.h> +#include <linux/completion.h> #include <linux/if.h> #include <linux/if_ether.h> #include <linux/kref.h> @@ -1328,6 +1329,9 @@ struct batadv_tp_vars { /** @finish_work: work item for the finishing procedure */ struct delayed_work finish_work; + /** @finished: completion signaled when a sender thread exits */ + struct completion finished; + /** @test_length: test length in milliseconds */ u32 test_length; diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig index 6b2b65a66700..ee6457d1a5ee 100644 --- a/net/bluetooth/Kconfig +++ b/net/bluetooth/Kconfig @@ -33,7 +33,6 @@ menuconfig BT HCI Device drivers (Interface to the hardware) RFCOMM Module (RFCOMM Protocol) BNEP Module (Bluetooth Network Encapsulation Protocol) - CMTP Module (CAPI Message Transport Protocol) HIDP Module (Human Interface Device Protocol) Say Y here to compile Bluetooth support into the kernel or say M to @@ -58,8 +57,6 @@ source "net/bluetooth/rfcomm/Kconfig" source "net/bluetooth/bnep/Kconfig" -source "net/bluetooth/cmtp/Kconfig" - source "net/bluetooth/hidp/Kconfig" config BT_LE diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile index a7eede7616d8..41049b280887 100644 --- a/net/bluetooth/Makefile +++ b/net/bluetooth/Makefile @@ -6,7 +6,6 @@ obj-$(CONFIG_BT) += bluetooth.o obj-$(CONFIG_BT_RFCOMM) += rfcomm/ obj-$(CONFIG_BT_BNEP) += bnep/ -obj-$(CONFIG_BT_CMTP) += cmtp/ obj-$(CONFIG_BT_HIDP) += hidp/ obj-$(CONFIG_BT_6LOWPAN) += bluetooth_6lowpan.o diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index d44987d4515c..853c8d7644b5 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -330,11 +330,18 @@ static int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb) goto badframe; break; case BNEP_FILTER_MULTI_ADDR_SET: - case BNEP_FILTER_NET_TYPE_SET: - /* Pull: ctrl type (1 b), len (2 b), data (len bytes) */ - if (!skb_pull(skb, 3 + *(u16 *)(skb->data + 1) * 2)) + case BNEP_FILTER_NET_TYPE_SET: { + u8 *hdr; + + /* Pull ctrl type (1 b) + len (2 b) */ + hdr = skb_pull_data(skb, 3); + if (!hdr) + goto badframe; + /* Pull data (len bytes); length is big-endian */ + if (!skb_pull(skb, get_unaligned_be16(&hdr[1]))) goto badframe; break; + } default: kfree_skb(skb); return 0; diff --git a/net/bluetooth/cmtp/Kconfig b/net/bluetooth/cmtp/Kconfig deleted file mode 100644 index 34e923466236..000000000000 --- a/net/bluetooth/cmtp/Kconfig +++ /dev/null @@ -1,12 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -config BT_CMTP - tristate "CMTP protocol support (DEPRECATED)" - depends on BT_BREDR && ISDN_CAPI && DEPRECATED - help - CMTP (CAPI Message Transport Protocol) is a transport layer - for CAPI messages. CMTP is required for the Bluetooth Common - ISDN Access Profile. - - Say Y here to compile CMTP support into the kernel or say M to - compile it as module (cmtp). - diff --git a/net/bluetooth/cmtp/Makefile b/net/bluetooth/cmtp/Makefile deleted file mode 100644 index b2262ca97499..000000000000 --- a/net/bluetooth/cmtp/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# Makefile for the Linux Bluetooth CMTP layer -# - -obj-$(CONFIG_BT_CMTP) += cmtp.o - -cmtp-objs := core.o sock.o capi.o diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c deleted file mode 100644 index b95413bffa16..000000000000 --- a/net/bluetooth/cmtp/capi.c +++ /dev/null @@ -1,579 +0,0 @@ -/* - CMTP implementation for Linux Bluetooth stack (BlueZ). - Copyright (C) 2002-2003 Marcel Holtmann <marcel@holtmann.org> - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License version 2 as - published by the Free Software Foundation; - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. - IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY - CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, - COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS - SOFTWARE IS DISCLAIMED. -*/ - -#include <linux/export.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/sched/signal.h> -#include <linux/slab.h> -#include <linux/poll.h> -#include <linux/fcntl.h> -#include <linux/skbuff.h> -#include <linux/socket.h> -#include <linux/ioctl.h> -#include <linux/file.h> -#include <linux/wait.h> -#include <linux/kthread.h> -#include <net/sock.h> - -#include <linux/isdn/capilli.h> -#include <linux/isdn/capicmd.h> -#include <linux/isdn/capiutil.h> - -#include "cmtp.h" - -#define CAPI_INTEROPERABILITY 0x20 - -#define CAPI_INTEROPERABILITY_REQ CAPICMD(CAPI_INTEROPERABILITY, CAPI_REQ) -#define CAPI_INTEROPERABILITY_CONF CAPICMD(CAPI_INTEROPERABILITY, CAPI_CONF) -#define CAPI_INTEROPERABILITY_IND CAPICMD(CAPI_INTEROPERABILITY, CAPI_IND) -#define CAPI_INTEROPERABILITY_RESP CAPICMD(CAPI_INTEROPERABILITY, CAPI_RESP) - -#define CAPI_INTEROPERABILITY_REQ_LEN (CAPI_MSG_BASELEN + 2) -#define CAPI_INTEROPERABILITY_CONF_LEN (CAPI_MSG_BASELEN + 4) -#define CAPI_INTEROPERABILITY_IND_LEN (CAPI_MSG_BASELEN + 2) -#define CAPI_INTEROPERABILITY_RESP_LEN (CAPI_MSG_BASELEN + 2) - -#define CAPI_FUNCTION_REGISTER 0 -#define CAPI_FUNCTION_RELEASE 1 -#define CAPI_FUNCTION_GET_PROFILE 2 -#define CAPI_FUNCTION_GET_MANUFACTURER 3 -#define CAPI_FUNCTION_GET_VERSION 4 -#define CAPI_FUNCTION_GET_SERIAL_NUMBER 5 -#define CAPI_FUNCTION_MANUFACTURER 6 -#define CAPI_FUNCTION_LOOPBACK 7 - - -#define CMTP_MSGNUM 1 -#define CMTP_APPLID 2 -#define CMTP_MAPPING 3 - -static struct cmtp_application *cmtp_application_add(struct cmtp_session *session, __u16 appl) -{ - struct cmtp_application *app = kzalloc_obj(*app); - - BT_DBG("session %p application %p appl %u", session, app, appl); - - if (!app) - return NULL; - - app->state = BT_OPEN; - app->appl = appl; - - list_add_tail(&app->list, &session->applications); - - return app; -} - -static void cmtp_application_del(struct cmtp_session *session, struct cmtp_application *app) -{ - BT_DBG("session %p application %p", session, app); - - if (app) { - list_del(&app->list); - kfree(app); - } -} - -static struct cmtp_application *cmtp_application_get(struct cmtp_session *session, int pattern, __u16 value) -{ - struct cmtp_application *app; - - list_for_each_entry(app, &session->applications, list) { - switch (pattern) { - case CMTP_MSGNUM: - if (app->msgnum == value) - return app; - break; - case CMTP_APPLID: - if (app->appl == value) - return app; - break; - case CMTP_MAPPING: - if (app->mapping == value) - return app; - break; - } - } - - return NULL; -} - -static int cmtp_msgnum_get(struct cmtp_session *session) -{ - session->msgnum++; - - if ((session->msgnum & 0xff) > 200) - session->msgnum = CMTP_INITIAL_MSGNUM + 1; - - return session->msgnum; -} - -static void cmtp_send_capimsg(struct cmtp_session *session, struct sk_buff *skb) -{ - struct cmtp_scb *scb = (void *) skb->cb; - - BT_DBG("session %p skb %p len %u", session, skb, skb->len); - - scb->id = -1; - scb->data = (CAPIMSG_COMMAND(skb->data) == CAPI_DATA_B3); - - skb_queue_tail(&session->transmit, skb); - - wake_up_interruptible(sk_sleep(session->sock->sk)); -} - -static void cmtp_send_interopmsg(struct cmtp_session *session, - __u8 subcmd, __u16 appl, __u16 msgnum, - __u16 function, unsigned char *buf, int len) -{ - struct sk_buff *skb; - unsigned char *s; - - BT_DBG("session %p subcmd 0x%02x appl %u msgnum %u", session, subcmd, appl, msgnum); - - skb = alloc_skb(CAPI_MSG_BASELEN + 6 + len, GFP_ATOMIC); - if (!skb) { - BT_ERR("Can't allocate memory for interoperability packet"); - return; - } - - s = skb_put(skb, CAPI_MSG_BASELEN + 6 + len); - - capimsg_setu16(s, 0, CAPI_MSG_BASELEN + 6 + len); - capimsg_setu16(s, 2, appl); - capimsg_setu8 (s, 4, CAPI_INTEROPERABILITY); - capimsg_setu8 (s, 5, subcmd); - capimsg_setu16(s, 6, msgnum); - - /* Interoperability selector (Bluetooth Device Management) */ - capimsg_setu16(s, 8, 0x0001); - - capimsg_setu8 (s, 10, 3 + len); - capimsg_setu16(s, 11, function); - capimsg_setu8 (s, 13, len); - - if (len > 0) - memcpy(s + 14, buf, len); - - cmtp_send_capimsg(session, skb); -} - -static void cmtp_recv_interopmsg(struct cmtp_session *session, struct sk_buff *skb) -{ - struct capi_ctr *ctrl = &session->ctrl; - struct cmtp_application *application; - __u16 appl, msgnum, func, info; - __u32 controller; - - BT_DBG("session %p skb %p len %u", session, skb, skb->len); - - switch (CAPIMSG_SUBCOMMAND(skb->data)) { - case CAPI_CONF: - if (skb->len < CAPI_MSG_BASELEN + 10) - break; - - func = CAPIMSG_U16(skb->data, CAPI_MSG_BASELEN + 5); - info = CAPIMSG_U16(skb->data, CAPI_MSG_BASELEN + 8); - - switch (func) { - case CAPI_FUNCTION_REGISTER: - msgnum = CAPIMSG_MSGID(skb->data); - - application = cmtp_application_get(session, CMTP_MSGNUM, msgnum); - if (application) { - application->state = BT_CONNECTED; - application->msgnum = 0; - application->mapping = CAPIMSG_APPID(skb->data); - wake_up_interruptible(&session->wait); - } - - break; - - case CAPI_FUNCTION_RELEASE: - appl = CAPIMSG_APPID(skb->data); - - application = cmtp_application_get(session, CMTP_MAPPING, appl); - if (application) { - application->state = BT_CLOSED; - application->msgnum = 0; - wake_up_interruptible(&session->wait); - } - - break; - - case CAPI_FUNCTION_GET_PROFILE: - if (skb->len < CAPI_MSG_BASELEN + 11 + sizeof(capi_profile)) - break; - - controller = CAPIMSG_U16(skb->data, CAPI_MSG_BASELEN + 11); - msgnum = CAPIMSG_MSGID(skb->data); - - if (!info && (msgnum == CMTP_INITIAL_MSGNUM)) { - session->ncontroller = controller; - wake_up_interruptible(&session->wait); - break; - } - - if (!info && ctrl) { - memcpy(&ctrl->profile, - skb->data + CAPI_MSG_BASELEN + 11, - sizeof(capi_profile)); - session->state = BT_CONNECTED; - capi_ctr_ready(ctrl); - } - - break; - - case CAPI_FUNCTION_GET_MANUFACTURER: - if (!info && ctrl && skb->len > CAPI_MSG_BASELEN + 14) - strscpy_pad(ctrl->manu, - skb->data + CAPI_MSG_BASELEN + 15, - skb->data[CAPI_MSG_BASELEN + 14]); - break; - - case CAPI_FUNCTION_GET_VERSION: - if (skb->len < CAPI_MSG_BASELEN + 32) - break; - - if (!info && ctrl) { - ctrl->version.majorversion = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 16); - ctrl->version.minorversion = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 20); - ctrl->version.majormanuversion = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 24); - ctrl->version.minormanuversion = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 28); - } - - break; - - case CAPI_FUNCTION_GET_SERIAL_NUMBER: - if (!info && ctrl && skb->len > CAPI_MSG_BASELEN + 16) - strscpy_pad(ctrl->serial, - skb->data + CAPI_MSG_BASELEN + 17, - skb->data[CAPI_MSG_BASELEN + 16]); - break; - } - - break; - - case CAPI_IND: - if (skb->len < CAPI_MSG_BASELEN + 6) - break; - - func = CAPIMSG_U16(skb->data, CAPI_MSG_BASELEN + 3); - - if (func == CAPI_FUNCTION_LOOPBACK) { - int len = min_t(uint, skb->len - CAPI_MSG_BASELEN - 6, - skb->data[CAPI_MSG_BASELEN + 5]); - appl = CAPIMSG_APPID(skb->data); - msgnum = CAPIMSG_MSGID(skb->data); - cmtp_send_interopmsg(session, CAPI_RESP, appl, msgnum, func, - skb->data + CAPI_MSG_BASELEN + 6, len); - } - - break; - } - - kfree_skb(skb); -} - -void cmtp_recv_capimsg(struct cmtp_session *session, struct sk_buff *skb) -{ - struct capi_ctr *ctrl = &session->ctrl; - struct cmtp_application *application; - __u16 appl; - __u32 contr; - - BT_DBG("session %p skb %p len %u", session, skb, skb->len); - - if (skb->len < CAPI_MSG_BASELEN) - return; - - if (CAPIMSG_COMMAND(skb->data) == CAPI_INTEROPERABILITY) { - cmtp_recv_interopmsg(session, skb); - return; - } - - if (session->flags & BIT(CMTP_LOOPBACK)) { - kfree_skb(skb); - return; - } - - appl = CAPIMSG_APPID(skb->data); - contr = CAPIMSG_CONTROL(skb->data); - - application = cmtp_application_get(session, CMTP_MAPPING, appl); - if (application) { - appl = application->appl; - CAPIMSG_SETAPPID(skb->data, appl); - } else { - BT_ERR("Can't find application with id %u", appl); - kfree_skb(skb); - return; - } - - if ((contr & 0x7f) == 0x01) { - contr = (contr & 0xffffff80) | session->num; - CAPIMSG_SETCONTROL(skb->data, contr); - } - - capi_ctr_handle_message(ctrl, appl, skb); -} - -static int cmtp_load_firmware(struct capi_ctr *ctrl, capiloaddata *data) -{ - BT_DBG("ctrl %p data %p", ctrl, data); - - return 0; -} - -static void cmtp_reset_ctr(struct capi_ctr *ctrl) -{ - struct cmtp_session *session = ctrl->driverdata; - - BT_DBG("ctrl %p", ctrl); - - capi_ctr_down(ctrl); - - atomic_inc(&session->terminate); - wake_up_process(session->task); -} - -static void cmtp_register_appl(struct capi_ctr *ctrl, __u16 appl, capi_register_params *rp) -{ - DECLARE_WAITQUEUE(wait, current); - struct cmtp_session *session = ctrl->driverdata; - struct cmtp_application *application; - unsigned long timeo = CMTP_INTEROP_TIMEOUT; - unsigned char buf[8]; - int err = 0, nconn, want = rp->level3cnt; - - BT_DBG("ctrl %p appl %u level3cnt %u datablkcnt %u datablklen %u", - ctrl, appl, rp->level3cnt, rp->datablkcnt, rp->datablklen); - - application = cmtp_application_add(session, appl); - if (!application) { - BT_ERR("Can't allocate memory for new application"); - return; - } - - if (want < 0) - nconn = ctrl->profile.nbchannel * -want; - else - nconn = want; - - if (nconn == 0) - nconn = ctrl->profile.nbchannel; - - capimsg_setu16(buf, 0, nconn); - capimsg_setu16(buf, 2, rp->datablkcnt); - capimsg_setu16(buf, 4, rp->datablklen); - - application->state = BT_CONFIG; - application->msgnum = cmtp_msgnum_get(session); - - cmtp_send_interopmsg(session, CAPI_REQ, 0x0000, application->msgnum, - CAPI_FUNCTION_REGISTER, buf, 6); - - add_wait_queue(&session->wait, &wait); - while (1) { - set_current_state(TASK_INTERRUPTIBLE); - - if (!timeo) { - err = -EAGAIN; - break; - } - - if (application->state == BT_CLOSED) { - err = -application->err; - break; - } - - if (application->state == BT_CONNECTED) - break; - - if (signal_pending(current)) { - err = -EINTR; - break; - } - - timeo = schedule_timeout(timeo); - } - set_current_state(TASK_RUNNING); - remove_wait_queue(&session->wait, &wait); - - if (err) { - cmtp_application_del(session, application); - return; - } -} - -static void cmtp_release_appl(struct capi_ctr *ctrl, __u16 appl) -{ - struct cmtp_session *session = ctrl->driverdata; - struct cmtp_application *application; - - BT_DBG("ctrl %p appl %u", ctrl, appl); - - application = cmtp_application_get(session, CMTP_APPLID, appl); - if (!application) { - BT_ERR("Can't find application"); - return; - } - - application->msgnum = cmtp_msgnum_get(session); - - cmtp_send_interopmsg(session, CAPI_REQ, application->mapping, application->msgnum, - CAPI_FUNCTION_RELEASE, NULL, 0); - - wait_event_interruptible_timeout(session->wait, - (application->state == BT_CLOSED), CMTP_INTEROP_TIMEOUT); - - cmtp_application_del(session, application); -} - -static u16 cmtp_send_message(struct capi_ctr *ctrl, struct sk_buff *skb) -{ - struct cmtp_session *session = ctrl->driverdata; - struct cmtp_application *application; - __u16 appl; - __u32 contr; - - BT_DBG("ctrl %p skb %p", ctrl, skb); - - appl = CAPIMSG_APPID(skb->data); - contr = CAPIMSG_CONTROL(skb->data); - - application = cmtp_application_get(session, CMTP_APPLID, appl); - if ((!application) || (application->state != BT_CONNECTED)) { - BT_ERR("Can't find application with id %u", appl); - return CAPI_ILLAPPNR; - } - - CAPIMSG_SETAPPID(skb->data, application->mapping); - - if ((contr & 0x7f) == session->num) { - contr = (contr & 0xffffff80) | 0x01; - CAPIMSG_SETCONTROL(skb->data, contr); - } - - cmtp_send_capimsg(session, skb); - - return CAPI_NOERROR; -} - -static char *cmtp_procinfo(struct capi_ctr *ctrl) -{ - return "CAPI Message Transport Protocol"; -} - -static int cmtp_proc_show(struct seq_file *m, void *v) -{ - struct capi_ctr *ctrl = m->private; - struct cmtp_session *session = ctrl->driverdata; - struct cmtp_application *app; - - seq_printf(m, "%s\n\n", cmtp_procinfo(ctrl)); - seq_printf(m, "addr %s\n", session->name); - seq_printf(m, "ctrl %d\n", session->num); - - list_for_each_entry(app, &session->applications, list) { - seq_printf(m, "appl %u -> %u\n", app->appl, app->mapping); - } - - return 0; -} - -int cmtp_attach_device(struct cmtp_session *session) -{ - unsigned char buf[4]; - long ret; - - BT_DBG("session %p", session); - - capimsg_setu32(buf, 0, 0); - - cmtp_send_interopmsg(session, CAPI_REQ, 0xffff, CMTP_INITIAL_MSGNUM, - CAPI_FUNCTION_GET_PROFILE, buf, 4); - - ret = wait_event_interruptible_timeout(session->wait, - session->ncontroller, CMTP_INTEROP_TIMEOUT); - - BT_INFO("Found %d CAPI controller(s) on device %s", session->ncontroller, session->name); - - if (!ret) - return -ETIMEDOUT; - - if (!session->ncontroller) - return -ENODEV; - - if (session->ncontroller > 1) - BT_INFO("Setting up only CAPI controller 1"); - - session->ctrl.owner = THIS_MODULE; - session->ctrl.driverdata = session; - strcpy(session->ctrl.name, session->name); - - session->ctrl.driver_name = "cmtp"; - session->ctrl.load_firmware = cmtp_load_firmware; - session->ctrl.reset_ctr = cmtp_reset_ctr; - session->ctrl.register_appl = cmtp_register_appl; - session->ctrl.release_appl = cmtp_release_appl; - session->ctrl.send_message = cmtp_send_message; - - session->ctrl.procinfo = cmtp_procinfo; - session->ctrl.proc_show = cmtp_proc_show; - - if (attach_capi_ctr(&session->ctrl) < 0) { - BT_ERR("Can't attach new controller"); - return -EBUSY; - } - - session->num = session->ctrl.cnr; - - BT_DBG("session %p num %d", session, session->num); - - capimsg_setu32(buf, 0, 1); - - cmtp_send_interopmsg(session, CAPI_REQ, 0xffff, cmtp_msgnum_get(session), - CAPI_FUNCTION_GET_MANUFACTURER, buf, 4); - - cmtp_send_interopmsg(session, CAPI_REQ, 0xffff, cmtp_msgnum_get(session), - CAPI_FUNCTION_GET_VERSION, buf, 4); - - cmtp_send_interopmsg(session, CAPI_REQ, 0xffff, cmtp_msgnum_get(session), - CAPI_FUNCTION_GET_SERIAL_NUMBER, buf, 4); - - cmtp_send_interopmsg(session, CAPI_REQ, 0xffff, cmtp_msgnum_get(session), - CAPI_FUNCTION_GET_PROFILE, buf, 4); - - return 0; -} - -void cmtp_detach_device(struct cmtp_session *session) -{ - BT_DBG("session %p", session); - - detach_capi_ctr(&session->ctrl); -} diff --git a/net/bluetooth/cmtp/cmtp.h b/net/bluetooth/cmtp/cmtp.h deleted file mode 100644 index f6b9dc4e408f..000000000000 --- a/net/bluetooth/cmtp/cmtp.h +++ /dev/null @@ -1,129 +0,0 @@ -/* - CMTP implementation for Linux Bluetooth stack (BlueZ). - Copyright (C) 2002-2003 Marcel Holtmann <marcel@holtmann.org> - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License version 2 as - published by the Free Software Foundation; - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. - IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY - CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, - COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS - SOFTWARE IS DISCLAIMED. -*/ - -#ifndef __CMTP_H -#define __CMTP_H - -#include <linux/types.h> -#include <net/bluetooth/bluetooth.h> - -#define BTNAMSIZ 21 - -/* CMTP ioctl defines */ -#define CMTPCONNADD _IOW('C', 200, int) -#define CMTPCONNDEL _IOW('C', 201, int) -#define CMTPGETCONNLIST _IOR('C', 210, int) -#define CMTPGETCONNINFO _IOR('C', 211, int) - -#define CMTP_LOOPBACK 0 - -struct cmtp_connadd_req { - int sock; /* Connected socket */ - __u32 flags; -}; - -struct cmtp_conndel_req { - bdaddr_t bdaddr; - __u32 flags; -}; - -struct cmtp_conninfo { - bdaddr_t bdaddr; - __u32 flags; - __u16 state; - int num; -}; - -struct cmtp_connlist_req { - __u32 cnum; - struct cmtp_conninfo __user *ci; -}; - -int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock); -int cmtp_del_connection(struct cmtp_conndel_req *req); -int cmtp_get_connlist(struct cmtp_connlist_req *req); -int cmtp_get_conninfo(struct cmtp_conninfo *ci); - -/* CMTP session defines */ -#define CMTP_INTEROP_TIMEOUT (HZ * 5) -#define CMTP_INITIAL_MSGNUM 0xff00 - -struct cmtp_session { - struct list_head list; - - struct socket *sock; - - bdaddr_t bdaddr; - - unsigned long state; - unsigned long flags; - - uint mtu; - - char name[BTNAMSIZ]; - - atomic_t terminate; - struct task_struct *task; - - wait_queue_head_t wait; - - int ncontroller; - int num; - struct capi_ctr ctrl; - - struct list_head applications; - - unsigned long blockids; - int msgnum; - - struct sk_buff_head transmit; - - struct sk_buff *reassembly[16]; -}; - -struct cmtp_application { - struct list_head list; - - unsigned long state; - int err; - - __u16 appl; - __u16 mapping; - - __u16 msgnum; -}; - -struct cmtp_scb { - int id; - int data; -}; - -int cmtp_attach_device(struct cmtp_session *session); -void cmtp_detach_device(struct cmtp_session *session); - -void cmtp_recv_capimsg(struct cmtp_session *session, struct sk_buff *skb); - -/* CMTP init defines */ -int cmtp_init_sockets(void); -void cmtp_cleanup_sockets(void); - -#endif /* __CMTP_H */ diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c deleted file mode 100644 index 261aeeda3236..000000000000 --- a/net/bluetooth/cmtp/core.c +++ /dev/null @@ -1,519 +0,0 @@ -/* - CMTP implementation for Linux Bluetooth stack (BlueZ). - Copyright (C) 2002-2003 Marcel Holtmann <marcel@holtmann.org> - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License version 2 as - published by the Free Software Foundation; - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. - IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY - CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, - COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS - SOFTWARE IS DISCLAIMED. -*/ - -#include <linux/module.h> - -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/slab.h> -#include <linux/poll.h> -#include <linux/fcntl.h> -#include <linux/freezer.h> -#include <linux/skbuff.h> -#include <linux/socket.h> -#include <linux/ioctl.h> -#include <linux/file.h> -#include <linux/init.h> -#include <linux/kthread.h> -#include <net/sock.h> - -#include <linux/isdn/capilli.h> - -#include <net/bluetooth/bluetooth.h> -#include <net/bluetooth/l2cap.h> - -#include "cmtp.h" - -#define VERSION "1.0" - -static DECLARE_RWSEM(cmtp_session_sem); -static LIST_HEAD(cmtp_session_list); - -static struct cmtp_session *__cmtp_get_session(bdaddr_t *bdaddr) -{ - struct cmtp_session *session; - - BT_DBG(""); - - list_for_each_entry(session, &cmtp_session_list, list) - if (!bacmp(bdaddr, &session->bdaddr)) - return session; - - return NULL; -} - -static void __cmtp_link_session(struct cmtp_session *session) -{ - list_add(&session->list, &cmtp_session_list); -} - -static void __cmtp_unlink_session(struct cmtp_session *session) -{ - list_del(&session->list); -} - -static void __cmtp_copy_session(struct cmtp_session *session, struct cmtp_conninfo *ci) -{ - u32 valid_flags = BIT(CMTP_LOOPBACK); - memset(ci, 0, sizeof(*ci)); - bacpy(&ci->bdaddr, &session->bdaddr); - - ci->flags = session->flags & valid_flags; - ci->state = session->state; - - ci->num = session->num; -} - - -static inline int cmtp_alloc_block_id(struct cmtp_session *session) -{ - int i, id = -1; - - for (i = 0; i < 16; i++) - if (!test_and_set_bit(i, &session->blockids)) { - id = i; - break; - } - - return id; -} - -static inline void cmtp_free_block_id(struct cmtp_session *session, int id) -{ - clear_bit(id, &session->blockids); -} - -static inline void cmtp_add_msgpart(struct cmtp_session *session, int id, const unsigned char *buf, int count) -{ - struct sk_buff *skb = session->reassembly[id], *nskb; - int size; - - BT_DBG("session %p buf %p count %d", session, buf, count); - - size = (skb) ? skb->len + count : count; - - nskb = alloc_skb(size, GFP_ATOMIC); - if (!nskb) { - BT_ERR("Can't allocate memory for CAPI message"); - return; - } - - if (skb && (skb->len > 0)) - skb_copy_from_linear_data(skb, skb_put(nskb, skb->len), skb->len); - - skb_put_data(nskb, buf, count); - - session->reassembly[id] = nskb; - - kfree_skb(skb); -} - -static inline int cmtp_recv_frame(struct cmtp_session *session, struct sk_buff *skb) -{ - __u8 hdr, hdrlen, id; - __u16 len; - - BT_DBG("session %p skb %p len %d", session, skb, skb->len); - - while (skb->len > 0) { - hdr = skb->data[0]; - - switch (hdr & 0xc0) { - case 0x40: - hdrlen = 2; - len = skb->data[1]; - break; - case 0x80: - hdrlen = 3; - len = skb->data[1] | (skb->data[2] << 8); - break; - default: - hdrlen = 1; - len = 0; - break; - } - - id = (hdr & 0x3c) >> 2; - - BT_DBG("hdr 0x%02x hdrlen %d len %d id %d", hdr, hdrlen, len, id); - - if (hdrlen + len > skb->len) { - BT_ERR("Wrong size or header information in CMTP frame"); - break; - } - - if (len == 0) { - skb_pull(skb, hdrlen); - continue; - } - - switch (hdr & 0x03) { - case 0x00: - cmtp_add_msgpart(session, id, skb->data + hdrlen, len); - cmtp_recv_capimsg(session, session->reassembly[id]); - session->reassembly[id] = NULL; - break; - case 0x01: - cmtp_add_msgpart(session, id, skb->data + hdrlen, len); - break; - default: - kfree_skb(session->reassembly[id]); - session->reassembly[id] = NULL; - break; - } - - skb_pull(skb, hdrlen + len); - } - - kfree_skb(skb); - return 0; -} - -static int cmtp_send_frame(struct cmtp_session *session, unsigned char *data, int len) -{ - struct socket *sock = session->sock; - struct kvec iv = { data, len }; - struct msghdr msg; - - BT_DBG("session %p data %p len %d", session, data, len); - - if (!len) - return 0; - - memset(&msg, 0, sizeof(msg)); - - return kernel_sendmsg(sock, &msg, &iv, 1, len); -} - -static void cmtp_process_transmit(struct cmtp_session *session) -{ - struct sk_buff *skb, *nskb; - unsigned char *hdr; - unsigned int size, tail; - - BT_DBG("session %p", session); - - nskb = alloc_skb(session->mtu, GFP_ATOMIC); - if (!nskb) { - BT_ERR("Can't allocate memory for new frame"); - return; - } - - while ((skb = skb_dequeue(&session->transmit))) { - struct cmtp_scb *scb = (void *) skb->cb; - - tail = session->mtu - nskb->len; - if (tail < 5) { - cmtp_send_frame(session, nskb->data, nskb->len); - skb_trim(nskb, 0); - tail = session->mtu; - } - - size = min_t(uint, ((tail < 258) ? (tail - 2) : (tail - 3)), skb->len); - - if (scb->id < 0) { - scb->id = cmtp_alloc_block_id(session); - if (scb->id < 0) { - skb_queue_head(&session->transmit, skb); - break; - } - } - - if (size < 256) { - hdr = skb_put(nskb, 2); - hdr[0] = 0x40 - | ((scb->id << 2) & 0x3c) - | ((skb->len == size) ? 0x00 : 0x01); - hdr[1] = size; - } else { - hdr = skb_put(nskb, 3); - hdr[0] = 0x80 - | ((scb->id << 2) & 0x3c) - | ((skb->len == size) ? 0x00 : 0x01); - hdr[1] = size & 0xff; - hdr[2] = size >> 8; - } - - skb_copy_from_linear_data(skb, skb_put(nskb, size), size); - skb_pull(skb, size); - - if (skb->len > 0) { - skb_queue_head(&session->transmit, skb); - } else { - cmtp_free_block_id(session, scb->id); - if (scb->data) { - cmtp_send_frame(session, nskb->data, nskb->len); - skb_trim(nskb, 0); - } - kfree_skb(skb); - } - } - - cmtp_send_frame(session, nskb->data, nskb->len); - - kfree_skb(nskb); -} - -static int cmtp_session(void *arg) -{ - struct cmtp_session *session = arg; - struct sock *sk = session->sock->sk; - struct sk_buff *skb; - DEFINE_WAIT_FUNC(wait, woken_wake_function); - - BT_DBG("session %p", session); - - set_user_nice(current, -15); - - add_wait_queue(sk_sleep(sk), &wait); - while (1) { - if (atomic_read(&session->terminate)) - break; - if (sk->sk_state != BT_CONNECTED) - break; - - while ((skb = skb_dequeue(&sk->sk_receive_queue))) { - skb_orphan(skb); - if (!skb_linearize(skb)) - cmtp_recv_frame(session, skb); - else - kfree_skb(skb); - } - - cmtp_process_transmit(session); - - /* - * wait_woken() performs the necessary memory barriers - * for us; see the header comment for this primitive. - */ - wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); - } - remove_wait_queue(sk_sleep(sk), &wait); - - down_write(&cmtp_session_sem); - - if (!(session->flags & BIT(CMTP_LOOPBACK))) - cmtp_detach_device(session); - - fput(session->sock->file); - - __cmtp_unlink_session(session); - - up_write(&cmtp_session_sem); - - kfree(session); - module_put_and_kthread_exit(0); - return 0; -} - -int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock) -{ - u32 valid_flags = BIT(CMTP_LOOPBACK); - struct cmtp_session *session, *s; - int i, err; - - BT_DBG(""); - - if (!l2cap_is_socket(sock)) - return -EBADFD; - - if (req->flags & ~valid_flags) - return -EINVAL; - - session = kzalloc_obj(struct cmtp_session); - if (!session) - return -ENOMEM; - - down_write(&cmtp_session_sem); - - s = __cmtp_get_session(&l2cap_pi(sock->sk)->chan->dst); - if (s && s->state == BT_CONNECTED) { - err = -EEXIST; - goto failed; - } - - bacpy(&session->bdaddr, &l2cap_pi(sock->sk)->chan->dst); - - session->mtu = min_t(uint, l2cap_pi(sock->sk)->chan->omtu, - l2cap_pi(sock->sk)->chan->imtu); - - BT_DBG("mtu %d", session->mtu); - - sprintf(session->name, "%pMR", &session->bdaddr); - - session->sock = sock; - session->state = BT_CONFIG; - - init_waitqueue_head(&session->wait); - - session->msgnum = CMTP_INITIAL_MSGNUM; - - INIT_LIST_HEAD(&session->applications); - - skb_queue_head_init(&session->transmit); - - for (i = 0; i < 16; i++) - session->reassembly[i] = NULL; - - session->flags = req->flags; - - __cmtp_link_session(session); - - __module_get(THIS_MODULE); - session->task = kthread_run(cmtp_session, session, "kcmtpd_ctr_%d", - session->num); - if (IS_ERR(session->task)) { - module_put(THIS_MODULE); - err = PTR_ERR(session->task); - goto unlink; - } - - if (!(session->flags & BIT(CMTP_LOOPBACK))) { - err = cmtp_attach_device(session); - if (err < 0) { - /* Caller will call fput in case of failure, and so - * will cmtp_session kthread. - */ - get_file(session->sock->file); - - atomic_inc(&session->terminate); - wake_up_interruptible(sk_sleep(session->sock->sk)); - up_write(&cmtp_session_sem); - return err; - } - } - - up_write(&cmtp_session_sem); - return 0; - -unlink: - __cmtp_unlink_session(session); - -failed: - up_write(&cmtp_session_sem); - kfree(session); - return err; -} - -int cmtp_del_connection(struct cmtp_conndel_req *req) -{ - u32 valid_flags = 0; - struct cmtp_session *session; - int err = 0; - - BT_DBG(""); - - if (req->flags & ~valid_flags) - return -EINVAL; - - down_read(&cmtp_session_sem); - - session = __cmtp_get_session(&req->bdaddr); - if (session) { - /* Flush the transmit queue */ - skb_queue_purge(&session->transmit); - - /* Stop session thread */ - atomic_inc(&session->terminate); - - /* - * See the comment preceding the call to wait_woken() - * in cmtp_session(). - */ - wake_up_interruptible(sk_sleep(session->sock->sk)); - } else - err = -ENOENT; - - up_read(&cmtp_session_sem); - return err; -} - -int cmtp_get_connlist(struct cmtp_connlist_req *req) -{ - struct cmtp_session *session; - int err = 0, n = 0; - - BT_DBG(""); - - down_read(&cmtp_session_sem); - - list_for_each_entry(session, &cmtp_session_list, list) { - struct cmtp_conninfo ci; - - __cmtp_copy_session(session, &ci); - - if (copy_to_user(req->ci, &ci, sizeof(ci))) { - err = -EFAULT; - break; - } - - if (++n >= req->cnum) - break; - - req->ci++; - } - req->cnum = n; - - up_read(&cmtp_session_sem); - return err; -} - -int cmtp_get_conninfo(struct cmtp_conninfo *ci) -{ - struct cmtp_session *session; - int err = 0; - - down_read(&cmtp_session_sem); - - session = __cmtp_get_session(&ci->bdaddr); - if (session) - __cmtp_copy_session(session, ci); - else - err = -ENOENT; - - up_read(&cmtp_session_sem); - return err; -} - - -static int __init cmtp_init(void) -{ - BT_INFO("CMTP (CAPI Emulation) ver %s", VERSION); - - return cmtp_init_sockets(); -} - -static void __exit cmtp_exit(void) -{ - cmtp_cleanup_sockets(); -} - -module_init(cmtp_init); -module_exit(cmtp_exit); - -MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>"); -MODULE_DESCRIPTION("Bluetooth CMTP ver " VERSION); -MODULE_VERSION(VERSION); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("bt-proto-5"); diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c deleted file mode 100644 index 96d49d9fae96..000000000000 --- a/net/bluetooth/cmtp/sock.c +++ /dev/null @@ -1,271 +0,0 @@ -/* - CMTP implementation for Linux Bluetooth stack (BlueZ). - Copyright (C) 2002-2003 Marcel Holtmann <marcel@holtmann.org> - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License version 2 as - published by the Free Software Foundation; - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. - IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY - CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, - COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS - SOFTWARE IS DISCLAIMED. -*/ - -#include <linux/export.h> - -#include <linux/types.h> -#include <linux/capability.h> -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/poll.h> -#include <linux/fcntl.h> -#include <linux/skbuff.h> -#include <linux/socket.h> -#include <linux/ioctl.h> -#include <linux/file.h> -#include <linux/compat.h> -#include <linux/gfp.h> -#include <linux/uaccess.h> -#include <net/sock.h> - -#include <linux/isdn/capilli.h> - - -#include "cmtp.h" - -static struct bt_sock_list cmtp_sk_list = { - .lock = __RW_LOCK_UNLOCKED(cmtp_sk_list.lock) -}; - -static int cmtp_sock_release(struct socket *sock) -{ - struct sock *sk = sock->sk; - - BT_DBG("sock %p sk %p", sock, sk); - - if (!sk) - return 0; - - bt_sock_unlink(&cmtp_sk_list, sk); - - sock_orphan(sk); - sock_put(sk); - - return 0; -} - -static int do_cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, void __user *argp) -{ - struct cmtp_connadd_req ca; - struct cmtp_conndel_req cd; - struct cmtp_connlist_req cl; - struct cmtp_conninfo ci; - struct socket *nsock; - int err; - - BT_DBG("cmd %x arg %p", cmd, argp); - - switch (cmd) { - case CMTPCONNADD: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if (copy_from_user(&ca, argp, sizeof(ca))) - return -EFAULT; - - nsock = sockfd_lookup(ca.sock, &err); - if (!nsock) - return err; - - if (nsock->sk->sk_state != BT_CONNECTED) { - sockfd_put(nsock); - return -EBADFD; - } - - err = cmtp_add_connection(&ca, nsock); - if (!err) { - if (copy_to_user(argp, &ca, sizeof(ca))) - err = -EFAULT; - } else - sockfd_put(nsock); - - return err; - - case CMTPCONNDEL: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if (copy_from_user(&cd, argp, sizeof(cd))) - return -EFAULT; - - return cmtp_del_connection(&cd); - - case CMTPGETCONNLIST: - if (copy_from_user(&cl, argp, sizeof(cl))) - return -EFAULT; - - if (cl.cnum <= 0) - return -EINVAL; - - err = cmtp_get_connlist(&cl); - if (!err && copy_to_user(argp, &cl, sizeof(cl))) - return -EFAULT; - - return err; - - case CMTPGETCONNINFO: - if (copy_from_user(&ci, argp, sizeof(ci))) - return -EFAULT; - - err = cmtp_get_conninfo(&ci); - if (!err && copy_to_user(argp, &ci, sizeof(ci))) - return -EFAULT; - - return err; - } - - return -EINVAL; -} - -static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) -{ - return do_cmtp_sock_ioctl(sock, cmd, (void __user *)arg); -} - -#ifdef CONFIG_COMPAT -static int cmtp_sock_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) -{ - void __user *argp = compat_ptr(arg); - if (cmd == CMTPGETCONNLIST) { - struct cmtp_connlist_req cl; - u32 __user *p = argp; - u32 uci; - int err; - - if (get_user(cl.cnum, p) || get_user(uci, p + 1)) - return -EFAULT; - - cl.ci = compat_ptr(uci); - - if (cl.cnum <= 0) - return -EINVAL; - - err = cmtp_get_connlist(&cl); - - if (!err && put_user(cl.cnum, p)) - err = -EFAULT; - - return err; - } - - return do_cmtp_sock_ioctl(sock, cmd, argp); -} -#endif - -static const struct proto_ops cmtp_sock_ops = { - .family = PF_BLUETOOTH, - .owner = THIS_MODULE, - .release = cmtp_sock_release, - .ioctl = cmtp_sock_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = cmtp_sock_compat_ioctl, -#endif - .bind = sock_no_bind, - .getname = sock_no_getname, - .sendmsg = sock_no_sendmsg, - .recvmsg = sock_no_recvmsg, - .listen = sock_no_listen, - .shutdown = sock_no_shutdown, - .connect = sock_no_connect, - .socketpair = sock_no_socketpair, - .accept = sock_no_accept, - .mmap = sock_no_mmap -}; - -static struct proto cmtp_proto = { - .name = "CMTP", - .owner = THIS_MODULE, - .obj_size = sizeof(struct bt_sock) -}; - -static int cmtp_sock_create(struct net *net, struct socket *sock, int protocol, - int kern) -{ - struct sock *sk; - - BT_DBG("sock %p", sock); - - if (sock->type != SOCK_RAW) - return -ESOCKTNOSUPPORT; - - sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto, kern); - if (!sk) - return -ENOMEM; - - sock_init_data(sock, sk); - - sock->ops = &cmtp_sock_ops; - - sock->state = SS_UNCONNECTED; - - sock_reset_flag(sk, SOCK_ZAPPED); - - sk->sk_protocol = protocol; - sk->sk_state = BT_OPEN; - - bt_sock_link(&cmtp_sk_list, sk); - - return 0; -} - -static const struct net_proto_family cmtp_sock_family_ops = { - .family = PF_BLUETOOTH, - .owner = THIS_MODULE, - .create = cmtp_sock_create -}; - -int cmtp_init_sockets(void) -{ - int err; - - err = proto_register(&cmtp_proto, 0); - if (err < 0) - return err; - - err = bt_sock_register(BTPROTO_CMTP, &cmtp_sock_family_ops); - if (err < 0) { - BT_ERR("Can't register CMTP socket"); - goto error; - } - - err = bt_procfs_init(&init_net, "cmtp", &cmtp_sk_list, NULL); - if (err < 0) { - BT_ERR("Failed to create CMTP proc file"); - bt_sock_unregister(BTPROTO_HIDP); - goto error; - } - - BT_INFO("CMTP socket layer initialized"); - - return 0; - -error: - proto_unregister(&cmtp_proto); - return err; -} - -void cmtp_cleanup_sockets(void) -{ - bt_procfs_cleanup(&init_net, "cmtp"); - bt_sock_unregister(BTPROTO_CMTP); - proto_unregister(&cmtp_proto); -} diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 3a0592599086..17b46ad6a349 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -480,40 +480,107 @@ bool hci_setup_sync(struct hci_conn *conn, __u16 handle) return hci_setup_sync_conn(conn, handle); } -u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, - u16 to_multiplier) +struct le_conn_update_data { + struct hci_conn *conn; + u16 min; + u16 max; + u16 latency; + u16 to_multiplier; +}; + +static int le_conn_update_sync(struct hci_dev *hdev, void *data) { - struct hci_dev *hdev = conn->hdev; + struct le_conn_update_data *d = data; + struct hci_conn *conn = d->conn; struct hci_conn_params *params; struct hci_cp_le_conn_update cp; + u16 timeout; + u8 store_hint; + int err; + /* Verify connection is still alive and read conn fields under + * the same lock to prevent a concurrent disconnect from freeing + * or reusing the connection while we build the HCI command. + */ hci_dev_lock(hdev); - params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type); - if (params) { - params->conn_min_interval = min; - params->conn_max_interval = max; - params->conn_latency = latency; - params->supervision_timeout = to_multiplier; + if (!hci_conn_valid(hdev, conn)) { + hci_dev_unlock(hdev); + return -ECANCELED; } - hci_dev_unlock(hdev); - memset(&cp, 0, sizeof(cp)); cp.handle = cpu_to_le16(conn->handle); - cp.conn_interval_min = cpu_to_le16(min); - cp.conn_interval_max = cpu_to_le16(max); - cp.conn_latency = cpu_to_le16(latency); - cp.supervision_timeout = cpu_to_le16(to_multiplier); + cp.conn_interval_min = cpu_to_le16(d->min); + cp.conn_interval_max = cpu_to_le16(d->max); + cp.conn_latency = cpu_to_le16(d->latency); + cp.supervision_timeout = cpu_to_le16(d->to_multiplier); cp.min_ce_len = cpu_to_le16(0x0000); cp.max_ce_len = cpu_to_le16(0x0000); + timeout = conn->conn_timeout; + + hci_dev_unlock(hdev); - hci_send_cmd(hdev, HCI_OP_LE_CONN_UPDATE, sizeof(cp), &cp); + err = __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_CONN_UPDATE, + sizeof(cp), &cp, + HCI_EV_LE_CONN_UPDATE_COMPLETE, + timeout, NULL); + if (err) + return err; + + /* Update stored connection parameters after the controller has + * confirmed the update via the LE Connection Update Complete event. + */ + hci_dev_lock(hdev); + + params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type); + if (params) { + params->conn_min_interval = d->min; + params->conn_max_interval = d->max; + params->conn_latency = d->latency; + params->supervision_timeout = d->to_multiplier; + store_hint = 0x01; + } else { + store_hint = 0x00; + } - if (params) - return 0x01; + hci_dev_unlock(hdev); - return 0x00; + mgmt_new_conn_param(hdev, &conn->dst, conn->dst_type, store_hint, + d->min, d->max, d->latency, d->to_multiplier); + + return 0; +} + +static void le_conn_update_complete(struct hci_dev *hdev, void *data, int err) +{ + struct le_conn_update_data *d = data; + + hci_conn_put(d->conn); + kfree(d); +} + +void hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, + u16 to_multiplier) +{ + struct le_conn_update_data *d; + + d = kzalloc_obj(*d); + if (!d) + return; + + hci_conn_get(conn); + d->conn = conn; + d->min = min; + d->max = max; + d->latency = latency; + d->to_multiplier = to_multiplier; + + if (hci_cmd_sync_queue(conn->hdev, le_conn_update_sync, d, + le_conn_update_complete) < 0) { + hci_conn_put(conn); + kfree(d); + } } void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand, @@ -2130,6 +2197,9 @@ static int create_big_sync(struct hci_dev *hdev, void *data) u32 flags = 0; int err; + if (!hci_conn_valid(hdev, conn)) + return -ECANCELED; + if (qos->bcast.out.phys == BIT(1)) flags |= MGMT_ADV_FLAG_SEC_2M; @@ -2204,11 +2274,24 @@ static void create_big_complete(struct hci_dev *hdev, void *data, int err) bt_dev_dbg(hdev, "conn %p", conn); + if (err == -ECANCELED) + goto done; + + hci_dev_lock(hdev); + + if (!hci_conn_valid(hdev, conn)) + goto unlock; + if (err) { bt_dev_err(hdev, "Unable to create BIG: %d", err); hci_connect_cfm(conn, err); hci_conn_del(conn); } + +unlock: + hci_dev_unlock(hdev); +done: + hci_conn_put(conn); } struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, __u8 sid, @@ -2336,10 +2419,11 @@ struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst, BT_BOUND, &data); /* Queue start periodic advertising and create BIG */ - err = hci_cmd_sync_queue(hdev, create_big_sync, conn, + err = hci_cmd_sync_queue(hdev, create_big_sync, hci_conn_get(conn), create_big_complete); if (err < 0) { hci_conn_drop(conn); + hci_conn_put(conn); return ERR_PTR(err); } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index b2ee6b6a0f56..eea2f810aafa 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -7118,9 +7118,29 @@ static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data, continue; } + if (ev->num_bis <= i) { + bt_dev_err(hdev, + "Not enough BIS handles for BIG 0x%2.2x", + ev->handle); + ev->status = HCI_ERROR_UNSPECIFIED; + hci_connect_cfm(conn, ev->status); + hci_conn_del(conn); + continue; + } + if (hci_conn_set_handle(conn, - __le16_to_cpu(ev->bis_handle[i++]))) + __le16_to_cpu(ev->bis_handle[i++]))) { + bt_dev_err(hdev, + "Failed to set BIS handle for BIG 0x%2.2x", + ev->handle); + /* Force error so BIG gets terminated as not all BIS + * could be connected. + */ + ev->status = HCI_ERROR_UNSPECIFIED; + hci_connect_cfm(conn, ev->status); + hci_conn_del(conn); continue; + } conn->state = BT_CONNECTED; set_bit(HCI_CONN_BIG_CREATED, &conn->flags); @@ -7129,7 +7149,10 @@ static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data, hci_iso_setup_path(conn); } - if (!ev->status && !i) + /* If there is an unexpected error or if no BISes have been connected + * for the BIG, terminate it. + */ + if (ev->status == HCI_ERROR_UNSPECIFIED || (!ev->status && !i)) /* If no BISes have been connected for the BIG, * terminate. This is in case all bound connections * have been closed before the BIG creation @@ -7168,7 +7191,7 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, clear_bit(HCI_CONN_CREATE_BIG_SYNC, &conn->flags); conn->num_bis = 0; - memset(conn->bis, 0, sizeof(conn->num_bis)); + memset(conn->bis, 0, sizeof(conn->bis)); for (i = 0; i < ev->num_bis; i++) { u16 handle = le16_to_cpu(ev->bis[i]); diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 7bcf8c5ceaee..976f91eeb745 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -1036,6 +1036,28 @@ static struct hidp_session *hidp_session_find(const bdaddr_t *bdaddr) } /* + * Consume session->conn: clear the member under hidp_session_sem, then + * l2cap_unregister_user() and l2cap_conn_put() the snapshot outside the + * sem. At most one caller wins; later callers see NULL and skip. The + * reference is the one hidp_session_new() took via l2cap_conn_get(). + */ +static void hidp_session_unregister_conn(struct hidp_session *session) +{ + struct l2cap_conn *conn; + + down_write(&hidp_session_sem); + conn = session->conn; + if (conn) + session->conn = NULL; + up_write(&hidp_session_sem); + + if (conn) { + l2cap_unregister_user(conn, &session->user); + l2cap_conn_put(conn); + } +} + +/* * Start session synchronously * This starts a session thread and waits until initialization * is done or returns an error if it couldn't be started. @@ -1311,8 +1333,7 @@ static int hidp_session_thread(void *arg) * Instead, this call has the same semantics as if user-space tried to * delete the session. */ - if (session->conn) - l2cap_unregister_user(session->conn, &session->user); + hidp_session_unregister_conn(session); hidp_session_put(session); @@ -1418,7 +1439,7 @@ int hidp_connection_del(struct hidp_conndel_req *req) HIDP_CTRL_VIRTUAL_CABLE_UNPLUG, NULL, 0); else - l2cap_unregister_user(session->conn, &session->user); + hidp_session_unregister_conn(session); hidp_session_put(session); diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index be145e2736b7..7cb2864fe872 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -347,6 +347,7 @@ static int iso_connect_bis(struct sock *sk) return -EHOSTUNREACH; hci_dev_lock(hdev); + lock_sock(sk); if (!bis_capable(hdev)) { err = -EOPNOTSUPP; @@ -399,13 +400,9 @@ static int iso_connect_bis(struct sock *sk) goto unlock; } - lock_sock(sk); - err = iso_chan_add(conn, sk, NULL); - if (err) { - release_sock(sk); + if (err) goto unlock; - } /* Update source addr of the socket */ bacpy(&iso_pi(sk)->src, &hcon->src); @@ -421,9 +418,8 @@ static int iso_connect_bis(struct sock *sk) iso_sock_set_timer(sk, READ_ONCE(sk->sk_sndtimeo)); } - release_sock(sk); - unlock: + release_sock(sk); hci_dev_unlock(hdev); hci_dev_put(hdev); return err; @@ -444,6 +440,7 @@ static int iso_connect_cis(struct sock *sk) return -EHOSTUNREACH; hci_dev_lock(hdev); + lock_sock(sk); if (!cis_central_capable(hdev)) { err = -EOPNOTSUPP; @@ -498,13 +495,9 @@ static int iso_connect_cis(struct sock *sk) goto unlock; } - lock_sock(sk); - err = iso_chan_add(conn, sk, NULL); - if (err) { - release_sock(sk); + if (err) goto unlock; - } /* Update source addr of the socket */ bacpy(&iso_pi(sk)->src, &hcon->src); @@ -520,9 +513,8 @@ static int iso_connect_cis(struct sock *sk) iso_sock_set_timer(sk, READ_ONCE(sk->sk_sndtimeo)); } - release_sock(sk); - unlock: + release_sock(sk); hci_dev_unlock(hdev); hci_dev_put(hdev); return err; @@ -1193,7 +1185,7 @@ static int iso_sock_connect(struct socket *sock, struct sockaddr_unsized *addr, release_sock(sk); - if (bacmp(&iso_pi(sk)->dst, BDADDR_ANY)) + if (bacmp(&sa->iso_bdaddr, BDADDR_ANY)) err = iso_connect_cis(sk); else err = iso_connect_bis(sk); @@ -2256,8 +2248,10 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) sk = iso_get_sock(hdev, &hdev->bdaddr, bdaddr, BT_LISTEN, iso_match_sid, ev1); if (sk && !ev1->status) { + lock_sock(sk); iso_pi(sk)->sync_handle = le16_to_cpu(ev1->handle); iso_pi(sk)->bc_sid = ev1->sid; + release_sock(sk); } goto done; @@ -2268,8 +2262,10 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) sk = iso_get_sock(hdev, &hdev->bdaddr, bdaddr, BT_LISTEN, iso_match_sid_past, ev1a); if (sk && !ev1a->status) { + lock_sock(sk); iso_pi(sk)->sync_handle = le16_to_cpu(ev1a->sync_handle); iso_pi(sk)->bc_sid = ev1a->sid; + release_sock(sk); } goto done; @@ -2296,27 +2292,35 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) ev2); if (sk) { - int err; - struct hci_conn *hcon = iso_pi(sk)->conn->hcon; + int err = 0; + bool big_sync; + struct hci_conn *hcon; + lock_sock(sk); + + hcon = iso_pi(sk)->conn->hcon; iso_pi(sk)->qos.bcast.encryption = ev2->encryption; if (ev2->num_bis < iso_pi(sk)->bc_num_bis) iso_pi(sk)->bc_num_bis = ev2->num_bis; - if (!test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags) && - !test_and_set_bit(BT_SK_BIG_SYNC, &iso_pi(sk)->flags)) { + big_sync = !test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags) && + !test_and_set_bit(BT_SK_BIG_SYNC, &iso_pi(sk)->flags); + + if (big_sync) err = hci_conn_big_create_sync(hdev, hcon, &iso_pi(sk)->qos, iso_pi(sk)->sync_handle, iso_pi(sk)->bc_num_bis, iso_pi(sk)->bc_bis); - if (err) { - bt_dev_err(hdev, "hci_le_big_create_sync: %d", - err); - sock_put(sk); - sk = NULL; - } + + release_sock(sk); + + if (big_sync && err) { + bt_dev_err(hdev, "hci_le_big_create_sync: %d", + err); + sock_put(sk); + sk = NULL; } } @@ -2370,8 +2374,10 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) if (!base || base_len > BASE_MAX_LENGTH) goto done; + lock_sock(sk); memcpy(iso_pi(sk)->base, base, base_len); iso_pi(sk)->base_len = base_len; + release_sock(sk); } else { /* This is a PA data fragment. Keep pa_data_len set to 0 * until all data has been reassembled. diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 77dec104a9c3..7701528f1167 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4706,16 +4706,8 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, l2cap_send_cmd(conn, cmd->ident, L2CAP_CONN_PARAM_UPDATE_RSP, sizeof(rsp), &rsp); - if (!err) { - u8 store_hint; - - store_hint = hci_le_conn_update(hcon, min, max, latency, - to_multiplier); - mgmt_new_conn_param(hcon->hdev, &hcon->dst, hcon->dst_type, - store_hint, min, max, latency, - to_multiplier); - - } + if (!err) + hci_le_conn_update(hcon, min, max, latency, to_multiplier); return 0; } @@ -5428,7 +5420,7 @@ static inline int l2cap_ecred_reconf_req(struct l2cap_conn *conn, * configured, the MPS field may be less than the current MPS * of that channel. */ - if (chan[i]->remote_mps >= mps && i) { + if (chan[i]->remote_mps > mps && num_scid > 1) { BT_ERR("chan %p decreased MPS %u -> %u", chan[i], chan[i]->remote_mps, mps); result = L2CAP_RECONF_INVALID_MPS; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 71e8c1b45bce..cf590a67d364 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1498,6 +1498,9 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan) { struct sock *sk, *parent = chan->data; + if (!parent) + return NULL; + lock_sock(parent); /* Check for backlog size */ @@ -1657,6 +1660,9 @@ static void l2cap_sock_state_change_cb(struct l2cap_chan *chan, int state, { struct sock *sk = chan->data; + if (!sk) + return; + sk->sk_state = state; if (err) @@ -1758,6 +1764,9 @@ static long l2cap_sock_get_sndtimeo_cb(struct l2cap_chan *chan) { struct sock *sk = chan->data; + if (!sk) + return 0; + return READ_ONCE(sk->sk_sndtimeo); } diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 611a9a94151e..d11bd5337d57 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -1715,9 +1715,12 @@ static int rfcomm_recv_data(struct rfcomm_session *s, u8 dlci, int pf, struct sk } if (pf && d->cfc) { - u8 credits = *(u8 *) skb->data; skb_pull(skb, 1); + u8 *credits = skb_pull_data(skb, 1); - d->tx_credits += credits; + if (!credits) + goto drop; + + d->tx_credits += *credits; if (d->tx_credits) clear_bit(RFCOMM_TX_THROTTLED, &d->flags); } diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 18826d4b9c0b..eba44525d41d 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -472,9 +472,13 @@ static struct sock *sco_get_sock_listen(bdaddr_t *src) sk1 = sk; } + sk = sk ? sk : sk1; + if (sk) + sock_hold(sk); + read_unlock(&sco_sk_list.lock); - return sk ? sk : sk1; + return sk; } static void sco_sock_destruct(struct sock *sk) @@ -515,11 +519,13 @@ static void sco_sock_kill(struct sock *sk) BT_DBG("sk %p state %d", sk, sk->sk_state); /* Sock is dead, so set conn->sk to NULL to avoid possible UAF */ + lock_sock(sk); if (sco_pi(sk)->conn) { sco_conn_lock(sco_pi(sk)->conn); sco_pi(sk)->conn->sk = NULL; sco_conn_unlock(sco_pi(sk)->conn); } + release_sock(sk); /* Kill poor orphan */ bt_sock_unlink(&sco_sk_list, sk); @@ -1365,40 +1371,51 @@ static int sco_sock_release(struct socket *sock) static void sco_conn_ready(struct sco_conn *conn) { - struct sock *parent; - struct sock *sk = conn->sk; + struct sock *parent, *sk; + + sco_conn_lock(conn); + sk = sco_sock_hold(conn); + sco_conn_unlock(conn); BT_DBG("conn %p", conn); if (sk) { lock_sock(sk); - sco_sock_clear_timer(sk); - sk->sk_state = BT_CONNECTED; - sk->sk_state_change(sk); + + /* conn->sk may have become NULL if racing with sk close, but + * due to held hdev->lock, it can't become different sk. + */ + if (conn->sk) { + sco_sock_clear_timer(sk); + sk->sk_state = BT_CONNECTED; + sk->sk_state_change(sk); + } + release_sock(sk); + sock_put(sk); } else { - sco_conn_lock(conn); - - if (!conn->hcon) { - sco_conn_unlock(conn); + if (!conn->hcon) return; - } + + lockdep_assert_held(&conn->hcon->hdev->lock); parent = sco_get_sock_listen(&conn->hcon->src); - if (!parent) { - sco_conn_unlock(conn); + if (!parent) return; - } lock_sock(parent); + sco_conn_lock(conn); + + /* hdev->lock guarantees conn->sk == NULL still here */ + + if (parent->sk_state != BT_LISTEN) + goto release; + sk = sco_sock_alloc(sock_net(parent), NULL, BTPROTO_SCO, GFP_ATOMIC, 0); - if (!sk) { - release_sock(parent); - sco_conn_unlock(conn); - return; - } + if (!sk) + goto release; sco_sock_init(sk, parent); @@ -1417,9 +1434,10 @@ static void sco_conn_ready(struct sco_conn *conn) /* Wake up parent */ parent->sk_data_ready(parent); - release_sock(parent); - +release: sco_conn_unlock(conn); + release_sock(parent); + sock_put(parent); } } diff --git a/net/bridge/br.c b/net/bridge/br.c index c37e52e2f29a..a5e5b2db110e 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -464,10 +464,6 @@ static int __init br_init(void) brioctl_set(br_ioctl_stub); -#if IS_ENABLED(CONFIG_ATM_LANE) - br_fdb_test_addr_hook = br_fdb_test_addr; -#endif - #if IS_MODULE(CONFIG_BRIDGE_NETFILTER) pr_info("bridge: filtering via arp/ip/ip6tables is no longer available " "by default. Update your scripts to load br_netfilter if you " @@ -506,9 +502,6 @@ static void __exit br_deinit(void) rcu_barrier(); /* Wait for completion of call_rcu()'s */ br_nf_core_fini(); -#if IS_ENABLED(CONFIG_ATM_LANE) - br_fdb_test_addr_hook = NULL; -#endif br_fdb_fini(); } diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c index 0c8a06cdd46f..deb1ab1f24b0 100644 --- a/net/bridge/br_arp_nd_proxy.c +++ b/net/bridge/br_arp_nd_proxy.c @@ -201,11 +201,12 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br, f = br_fdb_find_rcu(br, n->ha, vid); if (f) { + const struct net_bridge_port *dst = READ_ONCE(f->dst); bool replied = false; if ((p && (p->flags & BR_PROXYARP)) || - (f->dst && (f->dst->flags & BR_PROXYARP_WIFI)) || - br_is_neigh_suppress_enabled(f->dst, vid)) { + (dst && (dst->flags & BR_PROXYARP_WIFI)) || + br_is_neigh_suppress_enabled(dst, vid)) { if (!vid) br_arp_send(br, p, skb->dev, sip, tip, sha, n->ha, sha, 0, 0); @@ -469,9 +470,10 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br, f = br_fdb_find_rcu(br, n->ha, vid); if (f) { + const struct net_bridge_port *dst = READ_ONCE(f->dst); bool replied = false; - if (br_is_neigh_suppress_enabled(f->dst, vid)) { + if (br_is_neigh_suppress_enabled(dst, vid)) { if (vid != 0) br_nd_send(br, p, skb, n, skb->vlan_proto, diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index e2c17f620f00..ac81e58d5f70 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -236,6 +236,7 @@ struct net_device *br_fdb_find_port(const struct net_device *br_dev, const unsigned char *addr, __u16 vid) { + const struct net_bridge_port *dst; struct net_bridge_fdb_entry *f; struct net_device *dev = NULL; struct net_bridge *br; @@ -248,8 +249,11 @@ struct net_device *br_fdb_find_port(const struct net_device *br_dev, br = netdev_priv(br_dev); rcu_read_lock(); f = br_fdb_find_rcu(br, addr, vid); - if (f && f->dst) - dev = f->dst->dev; + if (f) { + dst = READ_ONCE(f->dst); + if (dst) + dev = dst->dev; + } rcu_read_unlock(); return dev; @@ -346,7 +350,7 @@ static void fdb_delete_local(struct net_bridge *br, vg = nbp_vlan_group(op); if (op != p && ether_addr_equal(op->dev->dev_addr, addr) && (!vid || br_vlan_find(vg, vid))) { - f->dst = op; + WRITE_ONCE(f->dst, op); clear_bit(BR_FDB_ADDED_BY_USER, &f->flags); return; } @@ -357,7 +361,7 @@ static void fdb_delete_local(struct net_bridge *br, /* Maybe bridge device has same hw addr? */ if (p && ether_addr_equal(br->dev->dev_addr, addr) && (!vid || (v && br_vlan_should_use(v)))) { - f->dst = NULL; + WRITE_ONCE(f->dst, NULL); clear_bit(BR_FDB_ADDED_BY_USER, &f->flags); return; } @@ -892,35 +896,6 @@ void br_fdb_delete_by_port(struct net_bridge *br, spin_unlock_bh(&br->hash_lock); } -#if IS_ENABLED(CONFIG_ATM_LANE) -/* Interface used by ATM LANE hook to test - * if an addr is on some other bridge port */ -int br_fdb_test_addr(struct net_device *dev, unsigned char *addr) -{ - struct net_bridge_fdb_entry *fdb; - struct net_bridge_port *port; - int ret; - - rcu_read_lock(); - port = br_port_get_rcu(dev); - if (!port) - ret = 0; - else { - const struct net_bridge_port *dst = NULL; - - fdb = br_fdb_find_rcu(port->br, addr, 0); - if (fdb) - dst = READ_ONCE(fdb->dst); - - ret = dst && dst->dev != dev && - dst->state == BR_STATE_FORWARDING; - } - rcu_read_unlock(); - - return ret; -} -#endif /* CONFIG_ATM_LANE */ - /* * Fill buffer with forwarding table records in * the API format. @@ -928,6 +903,7 @@ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr) int br_fdb_fillbuf(struct net_bridge *br, void *buf, unsigned long maxnum, unsigned long skip) { + const struct net_bridge_port *dst; struct net_bridge_fdb_entry *f; struct __fdb_entry *fe = buf; unsigned long delta; @@ -944,7 +920,8 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, continue; /* ignore pseudo entry for local MAC address */ - if (!f->dst) + dst = READ_ONCE(f->dst); + if (!dst) continue; if (skip) { @@ -956,8 +933,8 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, memcpy(fe->mac_addr, f->key.addr.addr, ETH_ALEN); /* due to ABI compat need to split into hi/lo */ - fe->port_no = f->dst->port_no; - fe->port_hi = f->dst->port_no >> 8; + fe->port_no = dst->port_no; + fe->port_hi = dst->port_no >> 8; fe->is_local = test_bit(BR_FDB_LOCAL, &f->flags); if (!test_bit(BR_FDB_STATIC, &f->flags)) { @@ -1083,9 +1060,11 @@ int br_fdb_dump(struct sk_buff *skb, rcu_read_lock(); hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) { + const struct net_bridge_port *dst = READ_ONCE(f->dst); + if (*idx < ctx->fdb_idx) goto skip; - if (filter_dev && (!f->dst || f->dst->dev != filter_dev)) { + if (filter_dev && (!dst || dst->dev != filter_dev)) { if (filter_dev != dev) goto skip; /* !f->dst is a special case for bridge @@ -1093,10 +1072,10 @@ int br_fdb_dump(struct sk_buff *skb, * Therefore need a little more filtering * we only want to dump the !f->dst case */ - if (f->dst) + if (dst) goto skip; } - if (!filter_dev && f->dst) + if (!filter_dev && dst) goto skip; err = fdb_fill_info(skb, br, f, diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 361a9b84451e..bed1b1d9b282 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -855,7 +855,6 @@ void br_fdb_delete_by_port(struct net_bridge *br, struct net_bridge_fdb_entry *br_fdb_find_rcu(struct net_bridge *br, const unsigned char *addr, __u16 vid); -int br_fdb_test_addr(struct net_device *dev, unsigned char *addr); int br_fdb_fillbuf(struct net_bridge *br, void *buf, unsigned long count, unsigned long off); int br_fdb_add_local(struct net_bridge *br, struct net_bridge_port *source, @@ -2065,9 +2064,6 @@ void br_stp_port_timer_init(struct net_bridge_port *p); unsigned long br_timer_value(const struct timer_list *timer); /* br.c */ -#if IS_ENABLED(CONFIG_ATM_LANE) -extern int (*br_fdb_test_addr_hook)(struct net_device *dev, unsigned char *addr); -#endif /* br_mrp.c */ #if IS_ENABLED(CONFIG_BRIDGE_MRP) diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 741360219552..f05c79f215ea 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -112,24 +112,22 @@ static struct pernet_operations broute_net_ops = { static int __init ebtable_broute_init(void) { - int ret = ebt_register_template(&broute_table, broute_table_init); + int ret = register_pernet_subsys(&broute_net_ops); if (ret) return ret; - ret = register_pernet_subsys(&broute_net_ops); - if (ret) { - ebt_unregister_template(&broute_table); - return ret; - } + ret = ebt_register_template(&broute_table, broute_table_init); + if (ret) + unregister_pernet_subsys(&broute_net_ops); - return 0; + return ret; } static void __exit ebtable_broute_fini(void) { - unregister_pernet_subsys(&broute_net_ops); ebt_unregister_template(&broute_table); + unregister_pernet_subsys(&broute_net_ops); } module_init(ebtable_broute_init); diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index dacd81b12e62..0fc03b07e62a 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -93,24 +93,22 @@ static struct pernet_operations frame_filter_net_ops = { static int __init ebtable_filter_init(void) { - int ret = ebt_register_template(&frame_filter, frame_filter_table_init); + int ret = register_pernet_subsys(&frame_filter_net_ops); if (ret) return ret; - ret = register_pernet_subsys(&frame_filter_net_ops); - if (ret) { - ebt_unregister_template(&frame_filter); - return ret; - } + ret = ebt_register_template(&frame_filter, frame_filter_table_init); + if (ret) + unregister_pernet_subsys(&frame_filter_net_ops); - return 0; + return ret; } static void __exit ebtable_filter_fini(void) { - unregister_pernet_subsys(&frame_filter_net_ops); ebt_unregister_template(&frame_filter); + unregister_pernet_subsys(&frame_filter_net_ops); } module_init(ebtable_filter_init); diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 0f2a8c6118d4..8a10375d8909 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -93,24 +93,22 @@ static struct pernet_operations frame_nat_net_ops = { static int __init ebtable_nat_init(void) { - int ret = ebt_register_template(&frame_nat, frame_nat_table_init); + int ret = register_pernet_subsys(&frame_nat_net_ops); if (ret) return ret; - ret = register_pernet_subsys(&frame_nat_net_ops); - if (ret) { - ebt_unregister_template(&frame_nat); - return ret; - } + ret = ebt_register_template(&frame_nat, frame_nat_table_init); + if (ret) + unregister_pernet_subsys(&frame_nat_net_ops); return ret; } static void __exit ebtable_nat_fini(void) { - unregister_pernet_subsys(&frame_nat_net_ops); ebt_unregister_template(&frame_nat); + unregister_pernet_subsys(&frame_nat_net_ops); } module_init(ebtable_nat_init); diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index aea3e19875c6..b9f4daac09af 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -42,6 +42,7 @@ struct ebt_pernet { struct list_head tables; + struct list_head dead_tables; }; struct ebt_template { @@ -1162,11 +1163,6 @@ free_newinfo: static void __ebt_unregister_table(struct net *net, struct ebt_table *table) { - mutex_lock(&ebt_mutex); - list_del(&table->list); - mutex_unlock(&ebt_mutex); - audit_log_nfcfg(table->name, AF_BRIDGE, table->private->nentries, - AUDIT_XT_OP_UNREGISTER, GFP_KERNEL); EBT_ENTRY_ITERATE(table->private->entries, table->private->entries_size, ebt_cleanup_entry, net, NULL); if (table->private->nentries) @@ -1267,13 +1263,15 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table, for (i = 0; i < num_ops; i++) ops[i].priv = table; - list_add(&table->list, &ebt_net->tables); - mutex_unlock(&ebt_mutex); - table->ops = ops; ret = nf_register_net_hooks(net, ops, num_ops); - if (ret) + if (ret) { + synchronize_rcu(); __ebt_unregister_table(net, table); + } else { + list_add(&table->list, &ebt_net->tables); + } + mutex_unlock(&ebt_mutex); audit_log_nfcfg(repl->name, AF_BRIDGE, repl->nentries, AUDIT_XT_OP_REGISTER, GFP_KERNEL); @@ -1339,7 +1337,7 @@ void ebt_unregister_template(const struct ebt_table *t) } EXPORT_SYMBOL(ebt_unregister_template); -static struct ebt_table *__ebt_find_table(struct net *net, const char *name) +void ebt_unregister_table_pre_exit(struct net *net, const char *name) { struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id); struct ebt_table *t; @@ -1348,30 +1346,36 @@ static struct ebt_table *__ebt_find_table(struct net *net, const char *name) list_for_each_entry(t, &ebt_net->tables, list) { if (strcmp(t->name, name) == 0) { + list_move(&t->list, &ebt_net->dead_tables); mutex_unlock(&ebt_mutex); - return t; + nf_unregister_net_hooks(net, t->ops, hweight32(t->valid_hooks)); + return; } } mutex_unlock(&ebt_mutex); - return NULL; -} - -void ebt_unregister_table_pre_exit(struct net *net, const char *name) -{ - struct ebt_table *table = __ebt_find_table(net, name); - - if (table) - nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks)); } EXPORT_SYMBOL(ebt_unregister_table_pre_exit); void ebt_unregister_table(struct net *net, const char *name) { - struct ebt_table *table = __ebt_find_table(net, name); + struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id); + struct ebt_table *t; - if (table) - __ebt_unregister_table(net, table); + mutex_lock(&ebt_mutex); + + list_for_each_entry(t, &ebt_net->dead_tables, list) { + if (strcmp(t->name, name) == 0) { + list_del(&t->list); + audit_log_nfcfg(t->name, AF_BRIDGE, t->private->nentries, + AUDIT_XT_OP_UNREGISTER, GFP_KERNEL); + __ebt_unregister_table(net, t); + mutex_unlock(&ebt_mutex); + return; + } + } + + mutex_unlock(&ebt_mutex); } /* userspace just supplied us with counters */ @@ -2556,11 +2560,21 @@ static int __net_init ebt_pernet_init(struct net *net) struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id); INIT_LIST_HEAD(&ebt_net->tables); + INIT_LIST_HEAD(&ebt_net->dead_tables); return 0; } +static void __net_exit ebt_pernet_exit(struct net *net) +{ + struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id); + + WARN_ON_ONCE(!list_empty(&ebt_net->tables)); + WARN_ON_ONCE(!list_empty(&ebt_net->dead_tables)); +} + static struct pernet_operations ebt_net_ops = { .init = ebt_pernet_init, + .exit = ebt_pernet_exit, .id = &ebt_pernet_id, .size = sizeof(struct ebt_pernet), }; @@ -2569,19 +2583,20 @@ static int __init ebtables_init(void) { int ret; - ret = xt_register_target(&ebt_standard_target); + ret = register_pernet_subsys(&ebt_net_ops); if (ret < 0) return ret; - ret = nf_register_sockopt(&ebt_sockopts); + + ret = xt_register_target(&ebt_standard_target); if (ret < 0) { - xt_unregister_target(&ebt_standard_target); + unregister_pernet_subsys(&ebt_net_ops); return ret; } - ret = register_pernet_subsys(&ebt_net_ops); + ret = nf_register_sockopt(&ebt_sockopts); if (ret < 0) { - nf_unregister_sockopt(&ebt_sockopts); xt_unregister_target(&ebt_standard_target); + unregister_pernet_subsys(&ebt_net_ops); return ret; } diff --git a/net/caif/Kconfig b/net/caif/Kconfig deleted file mode 100644 index 87205251cc25..000000000000 --- a/net/caif/Kconfig +++ /dev/null @@ -1,54 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# CAIF net configurations -# - -menuconfig CAIF - tristate "CAIF support" - select CRC_CCITT - default n - help - The "Communication CPU to Application CPU Interface" (CAIF) is a packet - based connection-oriented MUX protocol developed by ST-Ericsson for use - with its modems. It is accessed from user space as sockets (PF_CAIF). - - Say Y (or M) here if you build for a phone product (e.g. Android or - MeeGo) that uses CAIF as transport. If unsure say N. - - If you select to build it as module then CAIF_NETDEV also needs to be - built as a module. You will also need to say Y (or M) to any CAIF - physical devices that your platform requires. - - See Documentation/networking/caif for a further explanation on how to - use and configure CAIF. - -config CAIF_DEBUG - bool "Enable Debug" - depends on CAIF - default n - help - Enable the inclusion of debug code in the CAIF stack. - Be aware that doing this will impact performance. - If unsure say N. - -config CAIF_NETDEV - tristate "CAIF GPRS Network device" - depends on CAIF - default CAIF - help - Say Y if you will be using a CAIF based GPRS network device. - This can be either built-in or a loadable module. - If you select to build it as a built-in then the main CAIF device must - also be a built-in. - If unsure say Y. - -config CAIF_USB - tristate "CAIF USB support" - depends on CAIF - default n - help - Say Y if you are using CAIF over USB CDC NCM. - This can be either built-in or a loadable module. - If you select to build it as a built-in then the main CAIF device must - also be a built-in. - If unsure say N. diff --git a/net/caif/Makefile b/net/caif/Makefile deleted file mode 100644 index 4f6c0517cdfb..000000000000 --- a/net/caif/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -ccflags-$(CONFIG_CAIF_DEBUG) := -DDEBUG - -caif-y := caif_dev.o \ - cfcnfg.o cfmuxl.o cfctrl.o \ - cffrml.o cfveil.o cfdbgl.o\ - cfserl.o cfdgml.o \ - cfrfml.o cfvidl.o cfutill.o \ - cfsrvl.o cfpkt_skbuff.o - -obj-$(CONFIG_CAIF) += caif.o -obj-$(CONFIG_CAIF_NETDEV) += chnl_net.o -obj-$(CONFIG_CAIF) += caif_socket.o -obj-$(CONFIG_CAIF_USB) += caif_usb.o - -export-y := caif.o diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c deleted file mode 100644 index 922de3d611c0..000000000000 --- a/net/caif/caif_dev.c +++ /dev/null @@ -1,586 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * CAIF Interface registration. - * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland - * - * Borrowed heavily from file: pn_dev.c. Thanks to Remi Denis-Courmont - * and Sakari Ailus <sakari.ailus@nokia.com> - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ - -#include <linux/kernel.h> -#include <linux/if_arp.h> -#include <linux/net.h> -#include <linux/netdevice.h> -#include <linux/mutex.h> -#include <linux/module.h> -#include <linux/spinlock.h> -#include <net/netns/generic.h> -#include <net/net_namespace.h> -#include <net/pkt_sched.h> -#include <net/caif/caif_device.h> -#include <net/caif/caif_layer.h> -#include <net/caif/caif_dev.h> -#include <net/caif/cfpkt.h> -#include <net/caif/cfcnfg.h> -#include <net/caif/cfserl.h> - -MODULE_DESCRIPTION("ST-Ericsson CAIF modem protocol support"); -MODULE_LICENSE("GPL"); - -/* Used for local tracking of the CAIF net devices */ -struct caif_device_entry { - struct cflayer layer; - struct list_head list; - struct net_device *netdev; - int __percpu *pcpu_refcnt; - spinlock_t flow_lock; - struct sk_buff *xoff_skb; - void (*xoff_skb_dtor)(struct sk_buff *skb); - bool xoff; -}; - -struct caif_device_entry_list { - struct list_head list; - /* Protects simulanous deletes in list */ - struct mutex lock; -}; - -struct caif_net { - struct cfcnfg *cfg; - struct caif_device_entry_list caifdevs; -}; - -static unsigned int caif_net_id; -static int q_high = 50; /* Percent */ - -struct cfcnfg *get_cfcnfg(struct net *net) -{ - struct caif_net *caifn; - caifn = net_generic(net, caif_net_id); - return caifn->cfg; -} -EXPORT_SYMBOL(get_cfcnfg); - -static struct caif_device_entry_list *caif_device_list(struct net *net) -{ - struct caif_net *caifn; - caifn = net_generic(net, caif_net_id); - return &caifn->caifdevs; -} - -static void caifd_put(struct caif_device_entry *e) -{ - this_cpu_dec(*e->pcpu_refcnt); -} - -static void caifd_hold(struct caif_device_entry *e) -{ - this_cpu_inc(*e->pcpu_refcnt); -} - -static int caifd_refcnt_read(struct caif_device_entry *e) -{ - int i, refcnt = 0; - for_each_possible_cpu(i) - refcnt += *per_cpu_ptr(e->pcpu_refcnt, i); - return refcnt; -} - -/* Allocate new CAIF device. */ -static struct caif_device_entry *caif_device_alloc(struct net_device *dev) -{ - struct caif_device_entry *caifd; - - caifd = kzalloc_obj(*caifd); - if (!caifd) - return NULL; - caifd->pcpu_refcnt = alloc_percpu(int); - if (!caifd->pcpu_refcnt) { - kfree(caifd); - return NULL; - } - caifd->netdev = dev; - dev_hold(dev); - return caifd; -} - -static struct caif_device_entry *caif_get(struct net_device *dev) -{ - struct caif_device_entry_list *caifdevs = - caif_device_list(dev_net(dev)); - struct caif_device_entry *caifd; - - list_for_each_entry_rcu(caifd, &caifdevs->list, list, - lockdep_rtnl_is_held()) { - if (caifd->netdev == dev) - return caifd; - } - return NULL; -} - -static void caif_flow_cb(struct sk_buff *skb) -{ - struct caif_device_entry *caifd; - void (*dtor)(struct sk_buff *skb) = NULL; - bool send_xoff; - - WARN_ON(skb->dev == NULL); - - rcu_read_lock(); - caifd = caif_get(skb->dev); - - WARN_ON(caifd == NULL); - if (!caifd) { - rcu_read_unlock(); - return; - } - - caifd_hold(caifd); - rcu_read_unlock(); - - spin_lock_bh(&caifd->flow_lock); - send_xoff = caifd->xoff; - caifd->xoff = false; - dtor = caifd->xoff_skb_dtor; - - if (WARN_ON(caifd->xoff_skb != skb)) - skb = NULL; - - caifd->xoff_skb = NULL; - caifd->xoff_skb_dtor = NULL; - - spin_unlock_bh(&caifd->flow_lock); - - if (dtor && skb) - dtor(skb); - - if (send_xoff) - caifd->layer.up-> - ctrlcmd(caifd->layer.up, - _CAIF_CTRLCMD_PHYIF_FLOW_ON_IND, - caifd->layer.id); - caifd_put(caifd); -} - -static int transmit(struct cflayer *layer, struct cfpkt *pkt) -{ - int err, high = 0, qlen = 0; - struct caif_device_entry *caifd = - container_of(layer, struct caif_device_entry, layer); - struct sk_buff *skb; - struct netdev_queue *txq; - - rcu_read_lock_bh(); - - skb = cfpkt_tonative(pkt); - skb->dev = caifd->netdev; - skb_reset_network_header(skb); - skb->protocol = htons(ETH_P_CAIF); - - /* Check if we need to handle xoff */ - if (likely(caifd->netdev->priv_flags & IFF_NO_QUEUE)) - goto noxoff; - - if (unlikely(caifd->xoff)) - goto noxoff; - - if (likely(!netif_queue_stopped(caifd->netdev))) { - struct Qdisc *sch; - - /* If we run with a TX queue, check if the queue is too long*/ - txq = netdev_get_tx_queue(skb->dev, 0); - sch = rcu_dereference_bh(txq->qdisc); - if (likely(qdisc_is_empty(sch))) - goto noxoff; - - /* can check for explicit qdisc len value only !NOLOCK, - * always set flow off otherwise - */ - high = (caifd->netdev->tx_queue_len * q_high) / 100; - if (!(sch->flags & TCQ_F_NOLOCK) && likely(sch->q.qlen < high)) - goto noxoff; - } - - /* Hold lock while accessing xoff */ - spin_lock_bh(&caifd->flow_lock); - if (caifd->xoff) { - spin_unlock_bh(&caifd->flow_lock); - goto noxoff; - } - - /* - * Handle flow off, we do this by temporary hi-jacking this - * skb's destructor function, and replace it with our own - * flow-on callback. The callback will set flow-on and call - * the original destructor. - */ - - pr_debug("queue has stopped(%d) or is full (%d > %d)\n", - netif_queue_stopped(caifd->netdev), - qlen, high); - caifd->xoff = true; - caifd->xoff_skb = skb; - caifd->xoff_skb_dtor = skb->destructor; - skb->destructor = caif_flow_cb; - spin_unlock_bh(&caifd->flow_lock); - - caifd->layer.up->ctrlcmd(caifd->layer.up, - _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND, - caifd->layer.id); -noxoff: - rcu_read_unlock_bh(); - - err = dev_queue_xmit(skb); - if (err > 0) - err = -EIO; - - return err; -} - -/* - * Stuff received packets into the CAIF stack. - * On error, returns non-zero and releases the skb. - */ -static int receive(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pkttype, struct net_device *orig_dev) -{ - struct cfpkt *pkt; - struct caif_device_entry *caifd; - int err; - - pkt = cfpkt_fromnative(CAIF_DIR_IN, skb); - - rcu_read_lock(); - caifd = caif_get(dev); - - if (!caifd || !caifd->layer.up || !caifd->layer.up->receive || - !netif_oper_up(caifd->netdev)) { - rcu_read_unlock(); - kfree_skb(skb); - return NET_RX_DROP; - } - - /* Hold reference to netdevice while using CAIF stack */ - caifd_hold(caifd); - rcu_read_unlock(); - - err = caifd->layer.up->receive(caifd->layer.up, pkt); - - /* For -EILSEQ the packet is not freed so free it now */ - if (err == -EILSEQ) - cfpkt_destroy(pkt); - - /* Release reference to stack upwards */ - caifd_put(caifd); - - if (err != 0) - err = NET_RX_DROP; - return err; -} - -static struct packet_type caif_packet_type __read_mostly = { - .type = cpu_to_be16(ETH_P_CAIF), - .func = receive, -}; - -static void dev_flowctrl(struct net_device *dev, int on) -{ - struct caif_device_entry *caifd; - - rcu_read_lock(); - - caifd = caif_get(dev); - if (!caifd || !caifd->layer.up || !caifd->layer.up->ctrlcmd) { - rcu_read_unlock(); - return; - } - - caifd_hold(caifd); - rcu_read_unlock(); - - caifd->layer.up->ctrlcmd(caifd->layer.up, - on ? - _CAIF_CTRLCMD_PHYIF_FLOW_ON_IND : - _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND, - caifd->layer.id); - caifd_put(caifd); -} - -int caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev, - struct cflayer *link_support, int head_room, - struct cflayer **layer, - int (**rcv_func)(struct sk_buff *, struct net_device *, - struct packet_type *, - struct net_device *)) -{ - struct caif_device_entry *caifd; - enum cfcnfg_phy_preference pref; - struct cfcnfg *cfg = get_cfcnfg(dev_net(dev)); - struct caif_device_entry_list *caifdevs; - int res; - - caifdevs = caif_device_list(dev_net(dev)); - caifd = caif_device_alloc(dev); - if (!caifd) - return -ENOMEM; - *layer = &caifd->layer; - spin_lock_init(&caifd->flow_lock); - - switch (caifdev->link_select) { - case CAIF_LINK_HIGH_BANDW: - pref = CFPHYPREF_HIGH_BW; - break; - case CAIF_LINK_LOW_LATENCY: - pref = CFPHYPREF_LOW_LAT; - break; - default: - pref = CFPHYPREF_HIGH_BW; - break; - } - mutex_lock(&caifdevs->lock); - list_add_rcu(&caifd->list, &caifdevs->list); - - strscpy(caifd->layer.name, dev->name, - sizeof(caifd->layer.name)); - caifd->layer.transmit = transmit; - res = cfcnfg_add_phy_layer(cfg, - dev, - &caifd->layer, - pref, - link_support, - caifdev->use_fcs, - head_room); - mutex_unlock(&caifdevs->lock); - if (rcv_func) - *rcv_func = receive; - return res; -} -EXPORT_SYMBOL(caif_enroll_dev); - -/* notify Caif of device events */ -static int caif_device_notify(struct notifier_block *me, unsigned long what, - void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct caif_device_entry *caifd = NULL; - struct caif_dev_common *caifdev; - struct cfcnfg *cfg; - struct cflayer *layer, *link_support; - int head_room = 0; - struct caif_device_entry_list *caifdevs; - int res; - - cfg = get_cfcnfg(dev_net(dev)); - caifdevs = caif_device_list(dev_net(dev)); - - caifd = caif_get(dev); - if (caifd == NULL && dev->type != ARPHRD_CAIF) - return 0; - - switch (what) { - case NETDEV_REGISTER: - if (caifd != NULL) - break; - - caifdev = netdev_priv(dev); - - link_support = NULL; - if (caifdev->use_frag) { - head_room = 1; - link_support = cfserl_create(dev->ifindex, - caifdev->use_stx); - if (!link_support) { - pr_warn("Out of memory\n"); - break; - } - } - res = caif_enroll_dev(dev, caifdev, link_support, head_room, - &layer, NULL); - if (res) - cfserl_release(link_support); - caifdev->flowctrl = dev_flowctrl; - break; - - case NETDEV_UP: - rcu_read_lock(); - - caifd = caif_get(dev); - if (caifd == NULL) { - rcu_read_unlock(); - break; - } - - caifd->xoff = false; - cfcnfg_set_phy_state(cfg, &caifd->layer, true); - rcu_read_unlock(); - - break; - - case NETDEV_DOWN: - rcu_read_lock(); - - caifd = caif_get(dev); - if (!caifd || !caifd->layer.up || !caifd->layer.up->ctrlcmd) { - rcu_read_unlock(); - return -EINVAL; - } - - cfcnfg_set_phy_state(cfg, &caifd->layer, false); - caifd_hold(caifd); - rcu_read_unlock(); - - caifd->layer.up->ctrlcmd(caifd->layer.up, - _CAIF_CTRLCMD_PHYIF_DOWN_IND, - caifd->layer.id); - - spin_lock_bh(&caifd->flow_lock); - - /* - * Replace our xoff-destructor with original destructor. - * We trust that skb->destructor *always* is called before - * the skb reference is invalid. The hijacked SKB destructor - * takes the flow_lock so manipulating the skb->destructor here - * should be safe. - */ - if (caifd->xoff_skb_dtor != NULL && caifd->xoff_skb != NULL) - caifd->xoff_skb->destructor = caifd->xoff_skb_dtor; - - caifd->xoff = false; - caifd->xoff_skb_dtor = NULL; - caifd->xoff_skb = NULL; - - spin_unlock_bh(&caifd->flow_lock); - caifd_put(caifd); - break; - - case NETDEV_UNREGISTER: - mutex_lock(&caifdevs->lock); - - caifd = caif_get(dev); - if (caifd == NULL) { - mutex_unlock(&caifdevs->lock); - break; - } - list_del_rcu(&caifd->list); - - /* - * NETDEV_UNREGISTER is called repeatedly until all reference - * counts for the net-device are released. If references to - * caifd is taken, simply ignore NETDEV_UNREGISTER and wait for - * the next call to NETDEV_UNREGISTER. - * - * If any packets are in flight down the CAIF Stack, - * cfcnfg_del_phy_layer will return nonzero. - * If no packets are in flight, the CAIF Stack associated - * with the net-device un-registering is freed. - */ - - if (caifd_refcnt_read(caifd) != 0 || - cfcnfg_del_phy_layer(cfg, &caifd->layer) != 0) { - - pr_info("Wait for device inuse\n"); - /* Enrole device if CAIF Stack is still in use */ - list_add_rcu(&caifd->list, &caifdevs->list); - mutex_unlock(&caifdevs->lock); - break; - } - - synchronize_rcu(); - dev_put(caifd->netdev); - free_percpu(caifd->pcpu_refcnt); - kfree(caifd); - - mutex_unlock(&caifdevs->lock); - break; - } - return 0; -} - -static struct notifier_block caif_device_notifier = { - .notifier_call = caif_device_notify, - .priority = 0, -}; - -/* Per-namespace Caif devices handling */ -static int caif_init_net(struct net *net) -{ - struct caif_net *caifn = net_generic(net, caif_net_id); - INIT_LIST_HEAD(&caifn->caifdevs.list); - mutex_init(&caifn->caifdevs.lock); - - caifn->cfg = cfcnfg_create(); - if (!caifn->cfg) - return -ENOMEM; - - return 0; -} - -static void caif_exit_net(struct net *net) -{ - struct caif_device_entry *caifd, *tmp; - struct caif_device_entry_list *caifdevs = - caif_device_list(net); - struct cfcnfg *cfg = get_cfcnfg(net); - - rtnl_lock(); - mutex_lock(&caifdevs->lock); - - list_for_each_entry_safe(caifd, tmp, &caifdevs->list, list) { - int i = 0; - list_del_rcu(&caifd->list); - cfcnfg_set_phy_state(cfg, &caifd->layer, false); - - while (i < 10 && - (caifd_refcnt_read(caifd) != 0 || - cfcnfg_del_phy_layer(cfg, &caifd->layer) != 0)) { - - pr_info("Wait for device inuse\n"); - msleep(250); - i++; - } - synchronize_rcu(); - dev_put(caifd->netdev); - free_percpu(caifd->pcpu_refcnt); - kfree(caifd); - } - cfcnfg_remove(cfg); - - mutex_unlock(&caifdevs->lock); - rtnl_unlock(); -} - -static struct pernet_operations caif_net_ops = { - .init = caif_init_net, - .exit = caif_exit_net, - .id = &caif_net_id, - .size = sizeof(struct caif_net), -}; - -/* Initialize Caif devices list */ -static int __init caif_device_init(void) -{ - int result; - - result = register_pernet_subsys(&caif_net_ops); - - if (result) - return result; - - register_netdevice_notifier(&caif_device_notifier); - dev_add_pack(&caif_packet_type); - - return result; -} - -static void __exit caif_device_exit(void) -{ - unregister_netdevice_notifier(&caif_device_notifier); - dev_remove_pack(&caif_packet_type); - unregister_pernet_subsys(&caif_net_ops); -} - -module_init(caif_device_init); -module_exit(caif_device_exit); diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c deleted file mode 100644 index af218742af5a..000000000000 --- a/net/caif/caif_socket.c +++ /dev/null @@ -1,1114 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ - -#include <linux/filter.h> -#include <linux/fs.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/sched/signal.h> -#include <linux/spinlock.h> -#include <linux/mutex.h> -#include <linux/list.h> -#include <linux/wait.h> -#include <linux/poll.h> -#include <linux/tcp.h> -#include <linux/uaccess.h> -#include <linux/debugfs.h> -#include <linux/caif/caif_socket.h> -#include <linux/pkt_sched.h> -#include <net/sock.h> -#include <net/tcp_states.h> -#include <net/caif/caif_layer.h> -#include <net/caif/caif_dev.h> -#include <net/caif/cfpkt.h> - -MODULE_DESCRIPTION("ST-Ericsson CAIF modem protocol socket support (AF_CAIF)"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_NETPROTO(AF_CAIF); - -/* - * CAIF state is re-using the TCP socket states. - * caif_states stored in sk_state reflect the state as reported by - * the CAIF stack, while sk_socket->state is the state of the socket. - */ -enum caif_states { - CAIF_CONNECTED = TCP_ESTABLISHED, - CAIF_CONNECTING = TCP_SYN_SENT, - CAIF_DISCONNECTED = TCP_CLOSE -}; - -#define TX_FLOW_ON_BIT 1 -#define RX_FLOW_ON_BIT 2 - -struct caifsock { - struct sock sk; /* must be first member */ - struct cflayer layer; - unsigned long flow_state; - struct caif_connect_request conn_req; - struct mutex readlock; - struct dentry *debugfs_socket_dir; - int headroom, tailroom, maxframe; -}; - -static int rx_flow_is_on(struct caifsock *cf_sk) -{ - return test_bit(RX_FLOW_ON_BIT, &cf_sk->flow_state); -} - -static int tx_flow_is_on(struct caifsock *cf_sk) -{ - return test_bit(TX_FLOW_ON_BIT, &cf_sk->flow_state); -} - -static void set_rx_flow_off(struct caifsock *cf_sk) -{ - clear_bit(RX_FLOW_ON_BIT, &cf_sk->flow_state); -} - -static void set_rx_flow_on(struct caifsock *cf_sk) -{ - set_bit(RX_FLOW_ON_BIT, &cf_sk->flow_state); -} - -static void set_tx_flow_off(struct caifsock *cf_sk) -{ - clear_bit(TX_FLOW_ON_BIT, &cf_sk->flow_state); -} - -static void set_tx_flow_on(struct caifsock *cf_sk) -{ - set_bit(TX_FLOW_ON_BIT, &cf_sk->flow_state); -} - -static void caif_read_lock(struct sock *sk) -{ - struct caifsock *cf_sk; - cf_sk = container_of(sk, struct caifsock, sk); - mutex_lock(&cf_sk->readlock); -} - -static void caif_read_unlock(struct sock *sk) -{ - struct caifsock *cf_sk; - cf_sk = container_of(sk, struct caifsock, sk); - mutex_unlock(&cf_sk->readlock); -} - -static int sk_rcvbuf_lowwater(struct caifsock *cf_sk) -{ - /* A quarter of full buffer is used a low water mark */ - return cf_sk->sk.sk_rcvbuf / 4; -} - -static void caif_flow_ctrl(struct sock *sk, int mode) -{ - struct caifsock *cf_sk; - cf_sk = container_of(sk, struct caifsock, sk); - if (cf_sk->layer.dn && cf_sk->layer.dn->modemcmd) - cf_sk->layer.dn->modemcmd(cf_sk->layer.dn, mode); -} - -/* - * Copied from sock.c:sock_queue_rcv_skb(), but changed so packets are - * not dropped, but CAIF is sending flow off instead. - */ -static void caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) -{ - int err; - unsigned long flags; - struct sk_buff_head *list = &sk->sk_receive_queue; - struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); - bool queued = false; - - if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= - (unsigned int)sk->sk_rcvbuf && rx_flow_is_on(cf_sk)) { - net_dbg_ratelimited("sending flow OFF (queue len = %d %d)\n", - atomic_read(&cf_sk->sk.sk_rmem_alloc), - sk_rcvbuf_lowwater(cf_sk)); - set_rx_flow_off(cf_sk); - caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ); - } - - err = sk_filter(sk, skb); - if (err) - goto out; - - if (!sk_rmem_schedule(sk, skb, skb->truesize) && rx_flow_is_on(cf_sk)) { - set_rx_flow_off(cf_sk); - net_dbg_ratelimited("sending flow OFF due to rmem_schedule\n"); - caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ); - } - skb->dev = NULL; - skb_set_owner_r(skb, sk); - spin_lock_irqsave(&list->lock, flags); - queued = !sock_flag(sk, SOCK_DEAD); - if (queued) - __skb_queue_tail(list, skb); - spin_unlock_irqrestore(&list->lock, flags); -out: - if (queued) - sk->sk_data_ready(sk); - else - kfree_skb(skb); -} - -/* Packet Receive Callback function called from CAIF Stack */ -static int caif_sktrecv_cb(struct cflayer *layr, struct cfpkt *pkt) -{ - struct caifsock *cf_sk; - struct sk_buff *skb; - - cf_sk = container_of(layr, struct caifsock, layer); - skb = cfpkt_tonative(pkt); - - if (unlikely(cf_sk->sk.sk_state != CAIF_CONNECTED)) { - kfree_skb(skb); - return 0; - } - caif_queue_rcv_skb(&cf_sk->sk, skb); - return 0; -} - -static void cfsk_hold(struct cflayer *layr) -{ - struct caifsock *cf_sk = container_of(layr, struct caifsock, layer); - sock_hold(&cf_sk->sk); -} - -static void cfsk_put(struct cflayer *layr) -{ - struct caifsock *cf_sk = container_of(layr, struct caifsock, layer); - sock_put(&cf_sk->sk); -} - -/* Packet Control Callback function called from CAIF */ -static void caif_ctrl_cb(struct cflayer *layr, - enum caif_ctrlcmd flow, - int phyid) -{ - struct caifsock *cf_sk = container_of(layr, struct caifsock, layer); - switch (flow) { - case CAIF_CTRLCMD_FLOW_ON_IND: - /* OK from modem to start sending again */ - set_tx_flow_on(cf_sk); - cf_sk->sk.sk_state_change(&cf_sk->sk); - break; - - case CAIF_CTRLCMD_FLOW_OFF_IND: - /* Modem asks us to shut up */ - set_tx_flow_off(cf_sk); - cf_sk->sk.sk_state_change(&cf_sk->sk); - break; - - case CAIF_CTRLCMD_INIT_RSP: - /* We're now connected */ - caif_client_register_refcnt(&cf_sk->layer, - cfsk_hold, cfsk_put); - cf_sk->sk.sk_state = CAIF_CONNECTED; - set_tx_flow_on(cf_sk); - cf_sk->sk.sk_shutdown = 0; - cf_sk->sk.sk_state_change(&cf_sk->sk); - break; - - case CAIF_CTRLCMD_DEINIT_RSP: - /* We're now disconnected */ - cf_sk->sk.sk_state = CAIF_DISCONNECTED; - cf_sk->sk.sk_state_change(&cf_sk->sk); - break; - - case CAIF_CTRLCMD_INIT_FAIL_RSP: - /* Connect request failed */ - cf_sk->sk.sk_err = ECONNREFUSED; - cf_sk->sk.sk_state = CAIF_DISCONNECTED; - cf_sk->sk.sk_shutdown = SHUTDOWN_MASK; - /* - * Socket "standards" seems to require POLLOUT to - * be set at connect failure. - */ - set_tx_flow_on(cf_sk); - cf_sk->sk.sk_state_change(&cf_sk->sk); - break; - - case CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND: - /* Modem has closed this connection, or device is down. */ - cf_sk->sk.sk_shutdown = SHUTDOWN_MASK; - cf_sk->sk.sk_err = ECONNRESET; - set_rx_flow_on(cf_sk); - sk_error_report(&cf_sk->sk); - break; - - default: - pr_debug("Unexpected flow command %d\n", flow); - } -} - -static void caif_check_flow_release(struct sock *sk) -{ - struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); - - if (rx_flow_is_on(cf_sk)) - return; - - if (atomic_read(&sk->sk_rmem_alloc) <= sk_rcvbuf_lowwater(cf_sk)) { - set_rx_flow_on(cf_sk); - caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_ON_REQ); - } -} - -/* - * Copied from unix_dgram_recvmsg, but removed credit checks, - * changed locking, address handling and added MSG_TRUNC. - */ -static int caif_seqpkt_recvmsg(struct socket *sock, struct msghdr *m, - size_t len, int flags) - -{ - struct sock *sk = sock->sk; - struct sk_buff *skb; - int ret; - int copylen; - - ret = -EOPNOTSUPP; - if (flags & MSG_OOB) - goto read_error; - - skb = skb_recv_datagram(sk, flags, &ret); - if (!skb) - goto read_error; - copylen = skb->len; - if (len < copylen) { - m->msg_flags |= MSG_TRUNC; - copylen = len; - } - - ret = skb_copy_datagram_msg(skb, 0, m, copylen); - if (ret) - goto out_free; - - ret = (flags & MSG_TRUNC) ? skb->len : copylen; -out_free: - skb_free_datagram(sk, skb); - caif_check_flow_release(sk); - return ret; - -read_error: - return ret; -} - - -/* Copied from unix_stream_wait_data, identical except for lock call. */ -static long caif_stream_data_wait(struct sock *sk, long timeo) -{ - DEFINE_WAIT(wait); - lock_sock(sk); - - for (;;) { - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - - if (!skb_queue_empty(&sk->sk_receive_queue) || - sk->sk_err || - sk->sk_state != CAIF_CONNECTED || - sock_flag(sk, SOCK_DEAD) || - (sk->sk_shutdown & RCV_SHUTDOWN) || - signal_pending(current) || - !timeo) - break; - - sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); - release_sock(sk); - timeo = schedule_timeout(timeo); - lock_sock(sk); - - if (sock_flag(sk, SOCK_DEAD)) - break; - - sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); - } - - finish_wait(sk_sleep(sk), &wait); - release_sock(sk); - return timeo; -} - - -/* - * Copied from unix_stream_recvmsg, but removed credit checks, - * changed locking calls, changed address handling. - */ -static int caif_stream_recvmsg(struct socket *sock, struct msghdr *msg, - size_t size, int flags) -{ - struct sock *sk = sock->sk; - int copied = 0; - int target; - int err = 0; - long timeo; - - err = -EOPNOTSUPP; - if (flags&MSG_OOB) - goto out; - - /* - * Lock the socket to prevent queue disordering - * while sleeps in memcpy_tomsg - */ - err = -EAGAIN; - if (sk->sk_state == CAIF_CONNECTING) - goto out; - - caif_read_lock(sk); - target = sock_rcvlowat(sk, flags&MSG_WAITALL, size); - timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT); - - do { - int chunk; - struct sk_buff *skb; - - lock_sock(sk); - if (sock_flag(sk, SOCK_DEAD)) { - err = -ECONNRESET; - goto unlock; - } - skb = skb_dequeue(&sk->sk_receive_queue); - caif_check_flow_release(sk); - - if (skb == NULL) { - if (copied >= target) - goto unlock; - /* - * POSIX 1003.1g mandates this order. - */ - err = sock_error(sk); - if (err) - goto unlock; - err = -ECONNRESET; - if (sk->sk_shutdown & RCV_SHUTDOWN) - goto unlock; - - err = -EPIPE; - if (sk->sk_state != CAIF_CONNECTED) - goto unlock; - if (sock_flag(sk, SOCK_DEAD)) - goto unlock; - - release_sock(sk); - - err = -EAGAIN; - if (!timeo) - break; - - caif_read_unlock(sk); - - timeo = caif_stream_data_wait(sk, timeo); - - if (signal_pending(current)) { - err = sock_intr_errno(timeo); - goto out; - } - caif_read_lock(sk); - continue; -unlock: - release_sock(sk); - break; - } - release_sock(sk); - chunk = min_t(unsigned int, skb->len, size); - if (memcpy_to_msg(msg, skb->data, chunk)) { - skb_queue_head(&sk->sk_receive_queue, skb); - if (copied == 0) - copied = -EFAULT; - break; - } - copied += chunk; - size -= chunk; - - /* Mark read part of skb as used */ - if (!(flags & MSG_PEEK)) { - skb_pull(skb, chunk); - - /* put the skb back if we didn't use it up. */ - if (skb->len) { - skb_queue_head(&sk->sk_receive_queue, skb); - break; - } - kfree_skb(skb); - - } else { - /* - * It is questionable, see note in unix_dgram_recvmsg. - */ - /* put message back and return */ - skb_queue_head(&sk->sk_receive_queue, skb); - break; - } - } while (size); - caif_read_unlock(sk); - -out: - return copied ? : err; -} - -/* - * Copied from sock.c:sock_wait_for_wmem, but change to wait for - * CAIF flow-on and sock_writable. - */ -static long caif_wait_for_flow_on(struct caifsock *cf_sk, - int wait_writeable, long timeo, int *err) -{ - struct sock *sk = &cf_sk->sk; - DEFINE_WAIT(wait); - for (;;) { - *err = 0; - if (tx_flow_is_on(cf_sk) && - (!wait_writeable || sock_writeable(&cf_sk->sk))) - break; - *err = -ETIMEDOUT; - if (!timeo) - break; - *err = -ERESTARTSYS; - if (signal_pending(current)) - break; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - *err = -ECONNRESET; - if (sk->sk_shutdown & SHUTDOWN_MASK) - break; - *err = -sk->sk_err; - if (sk->sk_err) - break; - *err = -EPIPE; - if (cf_sk->sk.sk_state != CAIF_CONNECTED) - break; - timeo = schedule_timeout(timeo); - } - finish_wait(sk_sleep(sk), &wait); - return timeo; -} - -/* - * Transmit a SKB. The device may temporarily request re-transmission - * by returning EAGAIN. - */ -static int transmit_skb(struct sk_buff *skb, struct caifsock *cf_sk, - int noblock, long timeo) -{ - struct cfpkt *pkt; - - pkt = cfpkt_fromnative(CAIF_DIR_OUT, skb); - memset(skb->cb, 0, sizeof(struct caif_payload_info)); - cfpkt_set_prio(pkt, cf_sk->sk.sk_priority); - - if (cf_sk->layer.dn == NULL) { - kfree_skb(skb); - return -EINVAL; - } - - return cf_sk->layer.dn->transmit(cf_sk->layer.dn, pkt); -} - -/* Copied from af_unix:unix_dgram_sendmsg, and adapted to CAIF */ -static int caif_seqpkt_sendmsg(struct socket *sock, struct msghdr *msg, - size_t len) -{ - struct sock *sk = sock->sk; - struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); - int buffer_size; - int ret = 0; - struct sk_buff *skb = NULL; - int noblock; - long timeo; - caif_assert(cf_sk); - ret = sock_error(sk); - if (ret) - goto err; - - ret = -EOPNOTSUPP; - if (msg->msg_flags&MSG_OOB) - goto err; - - ret = -EOPNOTSUPP; - if (msg->msg_namelen) - goto err; - - noblock = msg->msg_flags & MSG_DONTWAIT; - - timeo = sock_sndtimeo(sk, noblock); - timeo = caif_wait_for_flow_on(container_of(sk, struct caifsock, sk), - 1, timeo, &ret); - - if (ret) - goto err; - ret = -EPIPE; - if (cf_sk->sk.sk_state != CAIF_CONNECTED || - sock_flag(sk, SOCK_DEAD) || - (sk->sk_shutdown & RCV_SHUTDOWN)) - goto err; - - /* Error if trying to write more than maximum frame size. */ - ret = -EMSGSIZE; - if (len > cf_sk->maxframe && cf_sk->sk.sk_protocol != CAIFPROTO_RFM) - goto err; - - buffer_size = len + cf_sk->headroom + cf_sk->tailroom; - - ret = -ENOMEM; - skb = sock_alloc_send_skb(sk, buffer_size, noblock, &ret); - - if (!skb || skb_tailroom(skb) < buffer_size) - goto err; - - skb_reserve(skb, cf_sk->headroom); - - ret = memcpy_from_msg(skb_put(skb, len), msg, len); - - if (ret) - goto err; - ret = transmit_skb(skb, cf_sk, noblock, timeo); - if (ret < 0) - /* skb is already freed */ - return ret; - - return len; -err: - kfree_skb(skb); - return ret; -} - -/* - * Copied from unix_stream_sendmsg and adapted to CAIF: - * Changed removed permission handling and added waiting for flow on - * and other minor adaptations. - */ -static int caif_stream_sendmsg(struct socket *sock, struct msghdr *msg, - size_t len) -{ - struct sock *sk = sock->sk; - struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); - int err, size; - struct sk_buff *skb; - int sent = 0; - long timeo; - - err = -EOPNOTSUPP; - if (unlikely(msg->msg_flags&MSG_OOB)) - goto out_err; - - if (unlikely(msg->msg_namelen)) - goto out_err; - - timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); - timeo = caif_wait_for_flow_on(cf_sk, 1, timeo, &err); - - if (unlikely(sk->sk_shutdown & SEND_SHUTDOWN)) - goto pipe_err; - - while (sent < len) { - - size = len-sent; - - if (size > cf_sk->maxframe) - size = cf_sk->maxframe; - - /* If size is more than half of sndbuf, chop up message */ - if (size > ((sk->sk_sndbuf >> 1) - 64)) - size = (sk->sk_sndbuf >> 1) - 64; - - if (size > SKB_MAX_ALLOC) - size = SKB_MAX_ALLOC; - - skb = sock_alloc_send_skb(sk, - size + cf_sk->headroom + - cf_sk->tailroom, - msg->msg_flags&MSG_DONTWAIT, - &err); - if (skb == NULL) - goto out_err; - - skb_reserve(skb, cf_sk->headroom); - /* - * If you pass two values to the sock_alloc_send_skb - * it tries to grab the large buffer with GFP_NOFS - * (which can fail easily), and if it fails grab the - * fallback size buffer which is under a page and will - * succeed. [Alan] - */ - size = min_t(int, size, skb_tailroom(skb)); - - err = memcpy_from_msg(skb_put(skb, size), msg, size); - if (err) { - kfree_skb(skb); - goto out_err; - } - err = transmit_skb(skb, cf_sk, - msg->msg_flags&MSG_DONTWAIT, timeo); - if (err < 0) - /* skb is already freed */ - goto pipe_err; - - sent += size; - } - - return sent; - -pipe_err: - if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL)) - send_sig(SIGPIPE, current, 0); - err = -EPIPE; -out_err: - return sent ? : err; -} - -static int setsockopt(struct socket *sock, int lvl, int opt, sockptr_t ov, - unsigned int ol) -{ - struct sock *sk = sock->sk; - struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); - int linksel; - - if (cf_sk->sk.sk_socket->state != SS_UNCONNECTED) - return -ENOPROTOOPT; - - switch (opt) { - case CAIFSO_LINK_SELECT: - if (ol < sizeof(int)) - return -EINVAL; - if (lvl != SOL_CAIF) - goto bad_sol; - if (copy_from_sockptr(&linksel, ov, sizeof(int))) - return -EINVAL; - lock_sock(&(cf_sk->sk)); - cf_sk->conn_req.link_selector = linksel; - release_sock(&cf_sk->sk); - return 0; - - case CAIFSO_REQ_PARAM: - if (lvl != SOL_CAIF) - goto bad_sol; - if (cf_sk->sk.sk_protocol != CAIFPROTO_UTIL) - return -ENOPROTOOPT; - lock_sock(&(cf_sk->sk)); - if (ol > sizeof(cf_sk->conn_req.param.data) || - copy_from_sockptr(&cf_sk->conn_req.param.data, ov, ol)) { - release_sock(&cf_sk->sk); - return -EINVAL; - } - cf_sk->conn_req.param.size = ol; - release_sock(&cf_sk->sk); - return 0; - - default: - return -ENOPROTOOPT; - } - - return 0; -bad_sol: - return -ENOPROTOOPT; - -} - -/* - * caif_connect() - Connect a CAIF Socket - * Copied and modified af_irda.c:irda_connect(). - * - * Note : by consulting "errno", the user space caller may learn the cause - * of the failure. Most of them are visible in the function, others may come - * from subroutines called and are listed here : - * o -EAFNOSUPPORT: bad socket family or type. - * o -ESOCKTNOSUPPORT: bad socket type or protocol - * o -EINVAL: bad socket address, or CAIF link type - * o -ECONNREFUSED: remote end refused the connection. - * o -EINPROGRESS: connect request sent but timed out (or non-blocking) - * o -EISCONN: already connected. - * o -ETIMEDOUT: Connection timed out (send timeout) - * o -ENODEV: No link layer to send request - * o -ECONNRESET: Received Shutdown indication or lost link layer - * o -ENOMEM: Out of memory - * - * State Strategy: - * o sk_state: holds the CAIF_* protocol state, it's updated by - * caif_ctrl_cb. - * o sock->state: holds the SS_* socket state and is updated by connect and - * disconnect. - */ -static int caif_connect(struct socket *sock, struct sockaddr_unsized *uaddr, - int addr_len, int flags) -{ - struct sock *sk = sock->sk; - struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); - long timeo; - int err; - int ifindex, headroom, tailroom; - unsigned int mtu; - struct net_device *dev; - - lock_sock(sk); - - err = -EINVAL; - if (addr_len < offsetofend(struct sockaddr, sa_family)) - goto out; - - err = -EAFNOSUPPORT; - if (uaddr->sa_family != AF_CAIF) - goto out; - - switch (sock->state) { - case SS_UNCONNECTED: - /* Normal case, a fresh connect */ - caif_assert(sk->sk_state == CAIF_DISCONNECTED); - break; - case SS_CONNECTING: - switch (sk->sk_state) { - case CAIF_CONNECTED: - sock->state = SS_CONNECTED; - err = -EISCONN; - goto out; - case CAIF_DISCONNECTED: - /* Reconnect allowed */ - break; - case CAIF_CONNECTING: - err = -EALREADY; - if (flags & O_NONBLOCK) - goto out; - goto wait_connect; - } - break; - case SS_CONNECTED: - caif_assert(sk->sk_state == CAIF_CONNECTED || - sk->sk_state == CAIF_DISCONNECTED); - if (sk->sk_shutdown & SHUTDOWN_MASK) { - /* Allow re-connect after SHUTDOWN_IND */ - caif_disconnect_client(sock_net(sk), &cf_sk->layer); - caif_free_client(&cf_sk->layer); - break; - } - /* No reconnect on a seqpacket socket */ - err = -EISCONN; - goto out; - case SS_DISCONNECTING: - case SS_FREE: - caif_assert(1); /*Should never happen */ - break; - } - sk->sk_state = CAIF_DISCONNECTED; - sock->state = SS_UNCONNECTED; - sk_stream_kill_queues(&cf_sk->sk); - - err = -EINVAL; - if (addr_len != sizeof(struct sockaddr_caif)) - goto out; - - memcpy(&cf_sk->conn_req.sockaddr, uaddr, - sizeof(struct sockaddr_caif)); - - /* Move to connecting socket, start sending Connect Requests */ - sock->state = SS_CONNECTING; - sk->sk_state = CAIF_CONNECTING; - - /* Check priority value comming from socket */ - /* if priority value is out of range it will be ajusted */ - if (cf_sk->sk.sk_priority > CAIF_PRIO_MAX) - cf_sk->conn_req.priority = CAIF_PRIO_MAX; - else if (cf_sk->sk.sk_priority < CAIF_PRIO_MIN) - cf_sk->conn_req.priority = CAIF_PRIO_MIN; - else - cf_sk->conn_req.priority = cf_sk->sk.sk_priority; - - /*ifindex = id of the interface.*/ - cf_sk->conn_req.ifindex = cf_sk->sk.sk_bound_dev_if; - - cf_sk->layer.receive = caif_sktrecv_cb; - - err = caif_connect_client(sock_net(sk), &cf_sk->conn_req, - &cf_sk->layer, &ifindex, &headroom, &tailroom); - - if (err < 0) { - cf_sk->sk.sk_socket->state = SS_UNCONNECTED; - cf_sk->sk.sk_state = CAIF_DISCONNECTED; - goto out; - } - - err = -ENODEV; - rcu_read_lock(); - dev = dev_get_by_index_rcu(sock_net(sk), ifindex); - if (!dev) { - rcu_read_unlock(); - goto out; - } - cf_sk->headroom = LL_RESERVED_SPACE_EXTRA(dev, headroom); - mtu = dev->mtu; - rcu_read_unlock(); - - cf_sk->tailroom = tailroom; - cf_sk->maxframe = mtu - (headroom + tailroom); - if (cf_sk->maxframe < 1) { - pr_warn("CAIF Interface MTU too small (%d)\n", dev->mtu); - err = -ENODEV; - goto out; - } - - err = -EINPROGRESS; -wait_connect: - - if (sk->sk_state != CAIF_CONNECTED && (flags & O_NONBLOCK)) - goto out; - - timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); - - release_sock(sk); - err = -ERESTARTSYS; - timeo = wait_event_interruptible_timeout(*sk_sleep(sk), - sk->sk_state != CAIF_CONNECTING, - timeo); - lock_sock(sk); - if (timeo < 0) - goto out; /* -ERESTARTSYS */ - - err = -ETIMEDOUT; - if (timeo == 0 && sk->sk_state != CAIF_CONNECTED) - goto out; - if (sk->sk_state != CAIF_CONNECTED) { - sock->state = SS_UNCONNECTED; - err = sock_error(sk); - if (!err) - err = -ECONNREFUSED; - goto out; - } - sock->state = SS_CONNECTED; - err = 0; -out: - release_sock(sk); - return err; -} - -/* - * caif_release() - Disconnect a CAIF Socket - * Copied and modified af_irda.c:irda_release(). - */ -static int caif_release(struct socket *sock) -{ - struct sock *sk = sock->sk; - struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); - - if (!sk) - return 0; - - set_tx_flow_off(cf_sk); - - /* - * Ensure that packets are not queued after this point in time. - * caif_queue_rcv_skb checks SOCK_DEAD holding the queue lock, - * this ensures no packets when sock is dead. - */ - spin_lock_bh(&sk->sk_receive_queue.lock); - sock_set_flag(sk, SOCK_DEAD); - spin_unlock_bh(&sk->sk_receive_queue.lock); - sock->sk = NULL; - - WARN_ON(IS_ERR(cf_sk->debugfs_socket_dir)); - debugfs_remove_recursive(cf_sk->debugfs_socket_dir); - - lock_sock(&(cf_sk->sk)); - sk->sk_state = CAIF_DISCONNECTED; - sk->sk_shutdown = SHUTDOWN_MASK; - - caif_disconnect_client(sock_net(sk), &cf_sk->layer); - cf_sk->sk.sk_socket->state = SS_DISCONNECTING; - wake_up_interruptible_poll(sk_sleep(sk), EPOLLERR|EPOLLHUP); - - sock_orphan(sk); - sk_stream_kill_queues(&cf_sk->sk); - release_sock(sk); - sock_put(sk); - return 0; -} - -/* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */ -static __poll_t caif_poll(struct file *file, - struct socket *sock, poll_table *wait) -{ - struct sock *sk = sock->sk; - __poll_t mask; - struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); - - sock_poll_wait(file, sock, wait); - mask = 0; - - /* exceptional events? */ - if (sk->sk_err) - mask |= EPOLLERR; - if (sk->sk_shutdown == SHUTDOWN_MASK) - mask |= EPOLLHUP; - if (sk->sk_shutdown & RCV_SHUTDOWN) - mask |= EPOLLRDHUP; - - /* readable? */ - if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || - (sk->sk_shutdown & RCV_SHUTDOWN)) - mask |= EPOLLIN | EPOLLRDNORM; - - /* - * we set writable also when the other side has shut down the - * connection. This prevents stuck sockets. - */ - if (sock_writeable(sk) && tx_flow_is_on(cf_sk)) - mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; - - return mask; -} - -static const struct proto_ops caif_seqpacket_ops = { - .family = PF_CAIF, - .owner = THIS_MODULE, - .release = caif_release, - .bind = sock_no_bind, - .connect = caif_connect, - .socketpair = sock_no_socketpair, - .accept = sock_no_accept, - .getname = sock_no_getname, - .poll = caif_poll, - .ioctl = sock_no_ioctl, - .listen = sock_no_listen, - .shutdown = sock_no_shutdown, - .setsockopt = setsockopt, - .sendmsg = caif_seqpkt_sendmsg, - .recvmsg = caif_seqpkt_recvmsg, - .mmap = sock_no_mmap, -}; - -static const struct proto_ops caif_stream_ops = { - .family = PF_CAIF, - .owner = THIS_MODULE, - .release = caif_release, - .bind = sock_no_bind, - .connect = caif_connect, - .socketpair = sock_no_socketpair, - .accept = sock_no_accept, - .getname = sock_no_getname, - .poll = caif_poll, - .ioctl = sock_no_ioctl, - .listen = sock_no_listen, - .shutdown = sock_no_shutdown, - .setsockopt = setsockopt, - .sendmsg = caif_stream_sendmsg, - .recvmsg = caif_stream_recvmsg, - .mmap = sock_no_mmap, -}; - -/* This function is called when a socket is finally destroyed. */ -static void caif_sock_destructor(struct sock *sk) -{ - struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); - caif_assert(!refcount_read(&sk->sk_wmem_alloc)); - caif_assert(sk_unhashed(sk)); - caif_assert(!sk->sk_socket); - if (!sock_flag(sk, SOCK_DEAD)) { - pr_debug("Attempt to release alive CAIF socket: %p\n", sk); - return; - } - sk_stream_kill_queues(&cf_sk->sk); - WARN_ON_ONCE(sk->sk_forward_alloc); - caif_free_client(&cf_sk->layer); -} - -static int caif_create(struct net *net, struct socket *sock, int protocol, - int kern) -{ - struct sock *sk = NULL; - struct caifsock *cf_sk = NULL; - static struct proto prot = {.name = "PF_CAIF", - .owner = THIS_MODULE, - .obj_size = sizeof(struct caifsock), - .useroffset = offsetof(struct caifsock, conn_req.param), - .usersize = sizeof_field(struct caifsock, conn_req.param) - }; - - if (!capable(CAP_SYS_ADMIN) && !capable(CAP_NET_ADMIN)) - return -EPERM; - /* - * The sock->type specifies the socket type to use. - * The CAIF socket is a packet stream in the sense - * that it is packet based. CAIF trusts the reliability - * of the link, no resending is implemented. - */ - if (sock->type == SOCK_SEQPACKET) - sock->ops = &caif_seqpacket_ops; - else if (sock->type == SOCK_STREAM) - sock->ops = &caif_stream_ops; - else - return -ESOCKTNOSUPPORT; - - if (protocol < 0 || protocol >= CAIFPROTO_MAX) - return -EPROTONOSUPPORT; - /* - * Set the socket state to unconnected. The socket state - * is really not used at all in the net/core or socket.c but the - * initialization makes sure that sock->state is not uninitialized. - */ - sk = sk_alloc(net, PF_CAIF, GFP_KERNEL, &prot, kern); - if (!sk) - return -ENOMEM; - - cf_sk = container_of(sk, struct caifsock, sk); - - /* Store the protocol */ - sk->sk_protocol = (unsigned char) protocol; - - /* Initialize default priority for well-known cases */ - switch (protocol) { - case CAIFPROTO_AT: - sk->sk_priority = TC_PRIO_CONTROL; - break; - case CAIFPROTO_RFM: - sk->sk_priority = TC_PRIO_INTERACTIVE_BULK; - break; - default: - sk->sk_priority = TC_PRIO_BESTEFFORT; - } - - /* - * Lock in order to try to stop someone from opening the socket - * too early. - */ - lock_sock(&(cf_sk->sk)); - - /* Initialize the nozero default sock structure data. */ - sock_init_data(sock, sk); - sk->sk_destruct = caif_sock_destructor; - - mutex_init(&cf_sk->readlock); /* single task reading lock */ - cf_sk->layer.ctrlcmd = caif_ctrl_cb; - cf_sk->sk.sk_socket->state = SS_UNCONNECTED; - cf_sk->sk.sk_state = CAIF_DISCONNECTED; - - set_tx_flow_off(cf_sk); - set_rx_flow_on(cf_sk); - - /* Set default options on configuration */ - cf_sk->conn_req.link_selector = CAIF_LINK_LOW_LATENCY; - cf_sk->conn_req.protocol = protocol; - release_sock(&cf_sk->sk); - return 0; -} - - -static const struct net_proto_family caif_family_ops = { - .family = PF_CAIF, - .create = caif_create, - .owner = THIS_MODULE, -}; - -static int __init caif_sktinit_module(void) -{ - return sock_register(&caif_family_ops); -} - -static void __exit caif_sktexit_module(void) -{ - sock_unregister(PF_CAIF); -} -module_init(caif_sktinit_module); -module_exit(caif_sktexit_module); diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c deleted file mode 100644 index 4d44960d4c2f..000000000000 --- a/net/caif/caif_usb.c +++ /dev/null @@ -1,216 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * CAIF USB handler - * Copyright (C) ST-Ericsson AB 2011 - * Author: Sjur Brendeland - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ - -#include <linux/module.h> -#include <linux/netdevice.h> -#include <linux/slab.h> -#include <linux/mii.h> -#include <linux/usb.h> -#include <linux/usb/usbnet.h> -#include <linux/etherdevice.h> -#include <net/netns/generic.h> -#include <net/caif/caif_dev.h> -#include <net/caif/caif_layer.h> -#include <net/caif/cfpkt.h> -#include <net/caif/cfcnfg.h> - -MODULE_DESCRIPTION("ST-Ericsson CAIF modem protocol USB support"); -MODULE_LICENSE("GPL"); - -#define CFUSB_PAD_DESCR_SZ 1 /* Alignment descriptor length */ -#define CFUSB_ALIGNMENT 4 /* Number of bytes to align. */ -#define CFUSB_MAX_HEADLEN (CFUSB_PAD_DESCR_SZ + CFUSB_ALIGNMENT-1) -#define STE_USB_VID 0x04cc /* USB Product ID for ST-Ericsson */ -#define STE_USB_PID_CAIF 0x230f /* Product id for CAIF Modems */ - -struct cfusbl { - struct cflayer layer; - u8 tx_eth_hdr[ETH_HLEN]; -}; - -static bool pack_added; - -static int cfusbl_receive(struct cflayer *layr, struct cfpkt *pkt) -{ - u8 hpad; - - /* Remove padding. */ - cfpkt_extr_head(pkt, &hpad, 1); - cfpkt_extr_head(pkt, NULL, hpad); - return layr->up->receive(layr->up, pkt); -} - -static int cfusbl_transmit(struct cflayer *layr, struct cfpkt *pkt) -{ - struct caif_payload_info *info; - u8 hpad; - u8 zeros[CFUSB_ALIGNMENT]; - struct sk_buff *skb; - struct cfusbl *usbl = container_of(layr, struct cfusbl, layer); - - skb = cfpkt_tonative(pkt); - - skb_reset_network_header(skb); - skb->protocol = htons(ETH_P_IP); - - info = cfpkt_info(pkt); - hpad = (info->hdr_len + CFUSB_PAD_DESCR_SZ) & (CFUSB_ALIGNMENT - 1); - - if (skb_headroom(skb) < ETH_HLEN + CFUSB_PAD_DESCR_SZ + hpad) { - pr_warn("Headroom too small\n"); - kfree_skb(skb); - return -EIO; - } - memset(zeros, 0, hpad); - - cfpkt_add_head(pkt, zeros, hpad); - cfpkt_add_head(pkt, &hpad, 1); - cfpkt_add_head(pkt, usbl->tx_eth_hdr, sizeof(usbl->tx_eth_hdr)); - return layr->dn->transmit(layr->dn, pkt); -} - -static void cfusbl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, - int phyid) -{ - if (layr->up && layr->up->ctrlcmd) - layr->up->ctrlcmd(layr->up, ctrl, layr->id); -} - -static struct cflayer *cfusbl_create(int phyid, const u8 ethaddr[ETH_ALEN], - u8 braddr[ETH_ALEN]) -{ - struct cfusbl *this = kmalloc_obj(struct cfusbl, GFP_ATOMIC); - - if (!this) - return NULL; - - caif_assert(offsetof(struct cfusbl, layer) == 0); - - memset(&this->layer, 0, sizeof(this->layer)); - this->layer.receive = cfusbl_receive; - this->layer.transmit = cfusbl_transmit; - this->layer.ctrlcmd = cfusbl_ctrlcmd; - snprintf(this->layer.name, CAIF_LAYER_NAME_SZ, "usb%d", phyid); - this->layer.id = phyid; - - /* - * Construct TX ethernet header: - * 0-5 destination address - * 5-11 source address - * 12-13 protocol type - */ - ether_addr_copy(&this->tx_eth_hdr[ETH_ALEN], braddr); - ether_addr_copy(&this->tx_eth_hdr[ETH_ALEN], ethaddr); - this->tx_eth_hdr[12] = cpu_to_be16(ETH_P_802_EX1) & 0xff; - this->tx_eth_hdr[13] = (cpu_to_be16(ETH_P_802_EX1) >> 8) & 0xff; - pr_debug("caif ethernet TX-header dst:%pM src:%pM type:%02x%02x\n", - this->tx_eth_hdr, this->tx_eth_hdr + ETH_ALEN, - this->tx_eth_hdr[12], this->tx_eth_hdr[13]); - - return (struct cflayer *) this; -} - -static void cfusbl_release(struct cflayer *layer) -{ - kfree(layer); -} - -static struct packet_type caif_usb_type __read_mostly = { - .type = cpu_to_be16(ETH_P_802_EX1), -}; - -static int cfusbl_device_notify(struct notifier_block *me, unsigned long what, - void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct caif_dev_common common; - struct cflayer *layer, *link_support; - struct usbnet *usbnet; - struct usb_device *usbdev; - int res; - - if (what == NETDEV_UNREGISTER && dev->reg_state >= NETREG_UNREGISTERED) - return 0; - - /* Check whether we have a NCM device, and find its VID/PID. */ - if (!(dev->dev.parent && dev->dev.parent->driver && - strcmp(dev->dev.parent->driver->name, "cdc_ncm") == 0)) - return 0; - - usbnet = netdev_priv(dev); - usbdev = usbnet->udev; - - pr_debug("USB CDC NCM device VID:0x%4x PID:0x%4x\n", - le16_to_cpu(usbdev->descriptor.idVendor), - le16_to_cpu(usbdev->descriptor.idProduct)); - - /* Check for VID/PID that supports CAIF */ - if (!(le16_to_cpu(usbdev->descriptor.idVendor) == STE_USB_VID && - le16_to_cpu(usbdev->descriptor.idProduct) == STE_USB_PID_CAIF)) - return 0; - - if (what == NETDEV_UNREGISTER) - module_put(THIS_MODULE); - - if (what != NETDEV_REGISTER) - return 0; - - __module_get(THIS_MODULE); - - memset(&common, 0, sizeof(common)); - common.use_frag = false; - common.use_fcs = false; - common.use_stx = false; - common.link_select = CAIF_LINK_HIGH_BANDW; - common.flowctrl = NULL; - - link_support = cfusbl_create(dev->ifindex, dev->dev_addr, - dev->broadcast); - - if (!link_support) - return -ENOMEM; - - if (dev->num_tx_queues > 1) - pr_warn("USB device uses more than one tx queue\n"); - - res = caif_enroll_dev(dev, &common, link_support, CFUSB_MAX_HEADLEN, - &layer, &caif_usb_type.func); - if (res) - goto err; - - if (!pack_added) - dev_add_pack(&caif_usb_type); - pack_added = true; - - strscpy(layer->name, dev->name, sizeof(layer->name)); - - return 0; -err: - cfusbl_release(link_support); - return res; -} - -static struct notifier_block caif_device_notifier = { - .notifier_call = cfusbl_device_notify, - .priority = 0, -}; - -static int __init cfusbl_init(void) -{ - return register_netdevice_notifier(&caif_device_notifier); -} - -static void __exit cfusbl_exit(void) -{ - unregister_netdevice_notifier(&caif_device_notifier); - dev_remove_pack(&caif_usb_type); -} - -module_init(cfusbl_init); -module_exit(cfusbl_exit); diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c deleted file mode 100644 index 8a80914783e8..000000000000 --- a/net/caif/cfcnfg.c +++ /dev/null @@ -1,612 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ - -#include <linux/kernel.h> -#include <linux/stddef.h> -#include <linux/slab.h> -#include <linux/netdevice.h> -#include <linux/module.h> -#include <net/caif/caif_layer.h> -#include <net/caif/cfpkt.h> -#include <net/caif/cfcnfg.h> -#include <net/caif/cfctrl.h> -#include <net/caif/cfmuxl.h> -#include <net/caif/cffrml.h> -#include <net/caif/cfserl.h> -#include <net/caif/cfsrvl.h> -#include <net/caif/caif_dev.h> - -#define container_obj(layr) container_of(layr, struct cfcnfg, layer) - -/* Information about CAIF physical interfaces held by Config Module in order - * to manage physical interfaces - */ -struct cfcnfg_phyinfo { - struct list_head node; - bool up; - - /* Pointer to the layer below the MUX (framing layer) */ - struct cflayer *frm_layer; - /* Pointer to the lowest actual physical layer */ - struct cflayer *phy_layer; - /* Unique identifier of the physical interface */ - unsigned int id; - /* Preference of the physical in interface */ - enum cfcnfg_phy_preference pref; - - /* Information about the physical device */ - struct dev_info dev_info; - - /* Interface index */ - int ifindex; - - /* Protocol head room added for CAIF link layer */ - int head_room; - - /* Use Start of frame checksum */ - bool use_fcs; -}; - -struct cfcnfg { - struct cflayer layer; - struct cflayer *ctrl; - struct cflayer *mux; - struct list_head phys; - struct mutex lock; -}; - -static void cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, - enum cfctrl_srv serv, u8 phyid, - struct cflayer *adapt_layer); -static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id); -static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id, - struct cflayer *adapt_layer); -static void cfctrl_resp_func(void); -static void cfctrl_enum_resp(void); - -struct cfcnfg *cfcnfg_create(void) -{ - struct cfcnfg *this; - struct cfctrl_rsp *resp; - - might_sleep(); - - /* Initiate this layer */ - this = kzalloc_obj(struct cfcnfg, GFP_ATOMIC); - if (!this) - return NULL; - this->mux = cfmuxl_create(); - if (!this->mux) - goto out_of_mem; - this->ctrl = cfctrl_create(); - if (!this->ctrl) - goto out_of_mem; - /* Initiate response functions */ - resp = cfctrl_get_respfuncs(this->ctrl); - resp->enum_rsp = cfctrl_enum_resp; - resp->linkerror_ind = cfctrl_resp_func; - resp->linkdestroy_rsp = cfcnfg_linkdestroy_rsp; - resp->sleep_rsp = cfctrl_resp_func; - resp->wake_rsp = cfctrl_resp_func; - resp->restart_rsp = cfctrl_resp_func; - resp->radioset_rsp = cfctrl_resp_func; - resp->linksetup_rsp = cfcnfg_linkup_rsp; - resp->reject_rsp = cfcnfg_reject_rsp; - INIT_LIST_HEAD(&this->phys); - - cfmuxl_set_uplayer(this->mux, this->ctrl, 0); - layer_set_dn(this->ctrl, this->mux); - layer_set_up(this->ctrl, this); - mutex_init(&this->lock); - - return this; -out_of_mem: - synchronize_rcu(); - - kfree(this->mux); - kfree(this->ctrl); - kfree(this); - return NULL; -} - -void cfcnfg_remove(struct cfcnfg *cfg) -{ - might_sleep(); - if (cfg) { - synchronize_rcu(); - - kfree(cfg->mux); - cfctrl_remove(cfg->ctrl); - kfree(cfg); - } -} - -static void cfctrl_resp_func(void) -{ -} - -static struct cfcnfg_phyinfo *cfcnfg_get_phyinfo_rcu(struct cfcnfg *cnfg, - u8 phyid) -{ - struct cfcnfg_phyinfo *phy; - - list_for_each_entry_rcu(phy, &cnfg->phys, node) - if (phy->id == phyid) - return phy; - return NULL; -} - -static void cfctrl_enum_resp(void) -{ -} - -static struct dev_info *cfcnfg_get_phyid(struct cfcnfg *cnfg, - enum cfcnfg_phy_preference phy_pref) -{ - /* Try to match with specified preference */ - struct cfcnfg_phyinfo *phy; - - list_for_each_entry_rcu(phy, &cnfg->phys, node) { - if (phy->up && phy->pref == phy_pref && - phy->frm_layer != NULL) - - return &phy->dev_info; - } - - /* Otherwise just return something */ - list_for_each_entry_rcu(phy, &cnfg->phys, node) - if (phy->up) - return &phy->dev_info; - - return NULL; -} - -static int cfcnfg_get_id_from_ifi(struct cfcnfg *cnfg, int ifi) -{ - struct cfcnfg_phyinfo *phy; - - list_for_each_entry_rcu(phy, &cnfg->phys, node) - if (phy->ifindex == ifi && phy->up) - return phy->id; - return -ENODEV; -} - -int caif_disconnect_client(struct net *net, struct cflayer *adap_layer) -{ - u8 channel_id; - struct cfcnfg *cfg = get_cfcnfg(net); - - caif_assert(adap_layer != NULL); - cfctrl_cancel_req(cfg->ctrl, adap_layer); - channel_id = adap_layer->id; - if (channel_id != 0) { - struct cflayer *servl; - servl = cfmuxl_remove_uplayer(cfg->mux, channel_id); - cfctrl_linkdown_req(cfg->ctrl, channel_id, adap_layer); - if (servl != NULL) - layer_set_up(servl, NULL); - } else - pr_debug("nothing to disconnect\n"); - - /* Do RCU sync before initiating cleanup */ - synchronize_rcu(); - if (adap_layer->ctrlcmd != NULL) - adap_layer->ctrlcmd(adap_layer, CAIF_CTRLCMD_DEINIT_RSP, 0); - return 0; - -} -EXPORT_SYMBOL(caif_disconnect_client); - -static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id) -{ -} - -static const int protohead[CFCTRL_SRV_MASK] = { - [CFCTRL_SRV_VEI] = 4, - [CFCTRL_SRV_DATAGRAM] = 7, - [CFCTRL_SRV_UTIL] = 4, - [CFCTRL_SRV_RFM] = 3, - [CFCTRL_SRV_DBG] = 3, -}; - - -static int caif_connect_req_to_link_param(struct cfcnfg *cnfg, - struct caif_connect_request *s, - struct cfctrl_link_param *l) -{ - struct dev_info *dev_info; - enum cfcnfg_phy_preference pref; - int res; - - memset(l, 0, sizeof(*l)); - /* In caif protocol low value is high priority */ - l->priority = CAIF_PRIO_MAX - s->priority + 1; - - if (s->ifindex != 0) { - res = cfcnfg_get_id_from_ifi(cnfg, s->ifindex); - if (res < 0) - return res; - l->phyid = res; - } else { - switch (s->link_selector) { - case CAIF_LINK_HIGH_BANDW: - pref = CFPHYPREF_HIGH_BW; - break; - case CAIF_LINK_LOW_LATENCY: - pref = CFPHYPREF_LOW_LAT; - break; - default: - return -EINVAL; - } - dev_info = cfcnfg_get_phyid(cnfg, pref); - if (dev_info == NULL) - return -ENODEV; - l->phyid = dev_info->id; - } - switch (s->protocol) { - case CAIFPROTO_AT: - l->linktype = CFCTRL_SRV_VEI; - l->endpoint = (s->sockaddr.u.at.type >> 2) & 0x3; - l->chtype = s->sockaddr.u.at.type & 0x3; - break; - case CAIFPROTO_DATAGRAM: - l->linktype = CFCTRL_SRV_DATAGRAM; - l->chtype = 0x00; - l->u.datagram.connid = s->sockaddr.u.dgm.connection_id; - break; - case CAIFPROTO_DATAGRAM_LOOP: - l->linktype = CFCTRL_SRV_DATAGRAM; - l->chtype = 0x03; - l->endpoint = 0x00; - l->u.datagram.connid = s->sockaddr.u.dgm.connection_id; - break; - case CAIFPROTO_RFM: - l->linktype = CFCTRL_SRV_RFM; - l->u.datagram.connid = s->sockaddr.u.rfm.connection_id; - strscpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume, - sizeof(l->u.rfm.volume)); - break; - case CAIFPROTO_UTIL: - l->linktype = CFCTRL_SRV_UTIL; - l->endpoint = 0x00; - l->chtype = 0x00; - strscpy(l->u.utility.name, s->sockaddr.u.util.service, - sizeof(l->u.utility.name)); - caif_assert(sizeof(l->u.utility.name) > 10); - l->u.utility.paramlen = s->param.size; - if (l->u.utility.paramlen > sizeof(l->u.utility.params)) - l->u.utility.paramlen = sizeof(l->u.utility.params); - - memcpy(l->u.utility.params, s->param.data, - l->u.utility.paramlen); - - break; - case CAIFPROTO_DEBUG: - l->linktype = CFCTRL_SRV_DBG; - l->endpoint = s->sockaddr.u.dbg.service; - l->chtype = s->sockaddr.u.dbg.type; - break; - default: - return -EINVAL; - } - return 0; -} - -int caif_connect_client(struct net *net, struct caif_connect_request *conn_req, - struct cflayer *adap_layer, int *ifindex, - int *proto_head, int *proto_tail) -{ - struct cflayer *frml; - struct cfcnfg_phyinfo *phy; - int err; - struct cfctrl_link_param param; - struct cfcnfg *cfg = get_cfcnfg(net); - - rcu_read_lock(); - err = caif_connect_req_to_link_param(cfg, conn_req, ¶m); - if (err) - goto unlock; - - phy = cfcnfg_get_phyinfo_rcu(cfg, param.phyid); - if (!phy) { - err = -ENODEV; - goto unlock; - } - err = -EINVAL; - - if (adap_layer == NULL) { - pr_err("adap_layer is zero\n"); - goto unlock; - } - if (adap_layer->receive == NULL) { - pr_err("adap_layer->receive is NULL\n"); - goto unlock; - } - if (adap_layer->ctrlcmd == NULL) { - pr_err("adap_layer->ctrlcmd == NULL\n"); - goto unlock; - } - - err = -ENODEV; - frml = phy->frm_layer; - if (frml == NULL) { - pr_err("Specified PHY type does not exist!\n"); - goto unlock; - } - caif_assert(param.phyid == phy->id); - caif_assert(phy->frm_layer->id == - param.phyid); - caif_assert(phy->phy_layer->id == - param.phyid); - - *ifindex = phy->ifindex; - *proto_tail = 2; - *proto_head = protohead[param.linktype] + phy->head_room; - - rcu_read_unlock(); - - /* FIXME: ENUMERATE INITIALLY WHEN ACTIVATING PHYSICAL INTERFACE */ - cfctrl_enum_req(cfg->ctrl, param.phyid); - return cfctrl_linkup_request(cfg->ctrl, ¶m, adap_layer); - -unlock: - rcu_read_unlock(); - return err; -} -EXPORT_SYMBOL(caif_connect_client); - -static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id, - struct cflayer *adapt_layer) -{ - if (adapt_layer != NULL && adapt_layer->ctrlcmd != NULL) - adapt_layer->ctrlcmd(adapt_layer, - CAIF_CTRLCMD_INIT_FAIL_RSP, 0); -} - -static void -cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv, - u8 phyid, struct cflayer *adapt_layer) -{ - struct cfcnfg *cnfg = container_obj(layer); - struct cflayer *servicel = NULL; - struct cfcnfg_phyinfo *phyinfo; - struct net_device *netdev; - - if (channel_id == 0) { - pr_warn("received channel_id zero\n"); - if (adapt_layer != NULL && adapt_layer->ctrlcmd != NULL) - adapt_layer->ctrlcmd(adapt_layer, - CAIF_CTRLCMD_INIT_FAIL_RSP, 0); - return; - } - - rcu_read_lock(); - - if (adapt_layer == NULL) { - pr_debug("link setup response but no client exist, send linkdown back\n"); - cfctrl_linkdown_req(cnfg->ctrl, channel_id, NULL); - goto unlock; - } - - caif_assert(cnfg != NULL); - caif_assert(phyid != 0); - - phyinfo = cfcnfg_get_phyinfo_rcu(cnfg, phyid); - if (phyinfo == NULL) { - pr_err("ERROR: Link Layer Device disappeared while connecting\n"); - goto unlock; - } - - caif_assert(phyinfo != NULL); - caif_assert(phyinfo->id == phyid); - caif_assert(phyinfo->phy_layer != NULL); - caif_assert(phyinfo->phy_layer->id == phyid); - - adapt_layer->id = channel_id; - - switch (serv) { - case CFCTRL_SRV_VEI: - servicel = cfvei_create(channel_id, &phyinfo->dev_info); - break; - case CFCTRL_SRV_DATAGRAM: - servicel = cfdgml_create(channel_id, - &phyinfo->dev_info); - break; - case CFCTRL_SRV_RFM: - netdev = phyinfo->dev_info.dev; - servicel = cfrfml_create(channel_id, &phyinfo->dev_info, - netdev->mtu); - break; - case CFCTRL_SRV_UTIL: - servicel = cfutill_create(channel_id, &phyinfo->dev_info); - break; - case CFCTRL_SRV_VIDEO: - servicel = cfvidl_create(channel_id, &phyinfo->dev_info); - break; - case CFCTRL_SRV_DBG: - servicel = cfdbgl_create(channel_id, &phyinfo->dev_info); - break; - default: - pr_err("Protocol error. Link setup response - unknown channel type\n"); - goto unlock; - } - if (!servicel) - goto unlock; - layer_set_dn(servicel, cnfg->mux); - cfmuxl_set_uplayer(cnfg->mux, servicel, channel_id); - layer_set_up(servicel, adapt_layer); - layer_set_dn(adapt_layer, servicel); - - rcu_read_unlock(); - - servicel->ctrlcmd(servicel, CAIF_CTRLCMD_INIT_RSP, 0); - return; -unlock: - rcu_read_unlock(); -} - -int -cfcnfg_add_phy_layer(struct cfcnfg *cnfg, - struct net_device *dev, struct cflayer *phy_layer, - enum cfcnfg_phy_preference pref, - struct cflayer *link_support, - bool fcs, int head_room) -{ - struct cflayer *frml; - struct cfcnfg_phyinfo *phyinfo = NULL; - int i, res = 0; - u8 phyid; - - mutex_lock(&cnfg->lock); - - /* CAIF protocol allow maximum 6 link-layers */ - for (i = 0; i < 7; i++) { - phyid = (dev->ifindex + i) & 0x7; - if (phyid == 0) - continue; - if (cfcnfg_get_phyinfo_rcu(cnfg, phyid) == NULL) - goto got_phyid; - } - pr_warn("Too many CAIF Link Layers (max 6)\n"); - res = -EEXIST; - goto out; - -got_phyid: - phyinfo = kzalloc_obj(struct cfcnfg_phyinfo, GFP_ATOMIC); - if (!phyinfo) { - res = -ENOMEM; - goto out; - } - - phy_layer->id = phyid; - phyinfo->pref = pref; - phyinfo->id = phyid; - phyinfo->dev_info.id = phyid; - phyinfo->dev_info.dev = dev; - phyinfo->phy_layer = phy_layer; - phyinfo->ifindex = dev->ifindex; - phyinfo->head_room = head_room; - phyinfo->use_fcs = fcs; - - frml = cffrml_create(phyid, fcs); - - if (!frml) { - res = -ENOMEM; - goto out_err; - } - phyinfo->frm_layer = frml; - layer_set_up(frml, cnfg->mux); - - if (link_support != NULL) { - link_support->id = phyid; - layer_set_dn(frml, link_support); - layer_set_up(link_support, frml); - layer_set_dn(link_support, phy_layer); - layer_set_up(phy_layer, link_support); - } else { - layer_set_dn(frml, phy_layer); - layer_set_up(phy_layer, frml); - } - - list_add_rcu(&phyinfo->node, &cnfg->phys); -out: - mutex_unlock(&cnfg->lock); - return res; - -out_err: - kfree(phyinfo); - mutex_unlock(&cnfg->lock); - return res; -} -EXPORT_SYMBOL(cfcnfg_add_phy_layer); - -int cfcnfg_set_phy_state(struct cfcnfg *cnfg, struct cflayer *phy_layer, - bool up) -{ - struct cfcnfg_phyinfo *phyinfo; - - rcu_read_lock(); - phyinfo = cfcnfg_get_phyinfo_rcu(cnfg, phy_layer->id); - if (phyinfo == NULL) { - rcu_read_unlock(); - return -ENODEV; - } - - if (phyinfo->up == up) { - rcu_read_unlock(); - return 0; - } - phyinfo->up = up; - - if (up) { - cffrml_hold(phyinfo->frm_layer); - cfmuxl_set_dnlayer(cnfg->mux, phyinfo->frm_layer, - phy_layer->id); - } else { - cfmuxl_remove_dnlayer(cnfg->mux, phy_layer->id); - cffrml_put(phyinfo->frm_layer); - } - - rcu_read_unlock(); - return 0; -} -EXPORT_SYMBOL(cfcnfg_set_phy_state); - -int cfcnfg_del_phy_layer(struct cfcnfg *cnfg, struct cflayer *phy_layer) -{ - struct cflayer *frml, *frml_dn; - u16 phyid; - struct cfcnfg_phyinfo *phyinfo; - - might_sleep(); - - mutex_lock(&cnfg->lock); - - phyid = phy_layer->id; - phyinfo = cfcnfg_get_phyinfo_rcu(cnfg, phyid); - - if (phyinfo == NULL) { - mutex_unlock(&cnfg->lock); - return 0; - } - caif_assert(phyid == phyinfo->id); - caif_assert(phy_layer == phyinfo->phy_layer); - caif_assert(phy_layer->id == phyid); - caif_assert(phyinfo->frm_layer->id == phyid); - - list_del_rcu(&phyinfo->node); - synchronize_rcu(); - - /* Fail if reference count is not zero */ - if (cffrml_refcnt_read(phyinfo->frm_layer) != 0) { - pr_info("Wait for device inuse\n"); - list_add_rcu(&phyinfo->node, &cnfg->phys); - mutex_unlock(&cnfg->lock); - return -EAGAIN; - } - - frml = phyinfo->frm_layer; - frml_dn = frml->dn; - cffrml_set_uplayer(frml, NULL); - cffrml_set_dnlayer(frml, NULL); - if (phy_layer != frml_dn) { - layer_set_up(frml_dn, NULL); - layer_set_dn(frml_dn, NULL); - } - layer_set_up(phy_layer, NULL); - - if (phyinfo->phy_layer != frml_dn) - kfree(frml_dn); - - cffrml_free(frml); - kfree(phyinfo); - mutex_unlock(&cnfg->lock); - - return 0; -} -EXPORT_SYMBOL(cfcnfg_del_phy_layer); diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c deleted file mode 100644 index c6cc2bfed65d..000000000000 --- a/net/caif/cfctrl.c +++ /dev/null @@ -1,631 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ - -#include <linux/stddef.h> -#include <linux/spinlock.h> -#include <linux/slab.h> -#include <linux/pkt_sched.h> -#include <net/caif/caif_layer.h> -#include <net/caif/cfpkt.h> -#include <net/caif/cfctrl.h> - -#define container_obj(layr) container_of(layr, struct cfctrl, serv.layer) -#define UTILITY_NAME_LENGTH 16 -#define CFPKT_CTRL_PKT_LEN 20 - -#ifdef CAIF_NO_LOOP -static int handle_loop(struct cfctrl *ctrl, - int cmd, struct cfpkt *pkt){ - return -1; -} -#else -static int handle_loop(struct cfctrl *ctrl, - int cmd, struct cfpkt *pkt); -#endif -static int cfctrl_recv(struct cflayer *layr, struct cfpkt *pkt); -static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, - int phyid); - - -struct cflayer *cfctrl_create(void) -{ - struct dev_info dev_info; - struct cfctrl *this = - kzalloc_obj(struct cfctrl, GFP_ATOMIC); - if (!this) - return NULL; - caif_assert(offsetof(struct cfctrl, serv.layer) == 0); - memset(&dev_info, 0, sizeof(dev_info)); - dev_info.id = 0xff; - cfsrvl_init(&this->serv, 0, &dev_info, false); - atomic_set(&this->req_seq_no, 1); - atomic_set(&this->rsp_seq_no, 1); - this->serv.layer.receive = cfctrl_recv; - sprintf(this->serv.layer.name, "ctrl"); - this->serv.layer.ctrlcmd = cfctrl_ctrlcmd; -#ifndef CAIF_NO_LOOP - spin_lock_init(&this->loop_linkid_lock); - this->loop_linkid = 1; -#endif - spin_lock_init(&this->info_list_lock); - INIT_LIST_HEAD(&this->list); - return &this->serv.layer; -} - -void cfctrl_remove(struct cflayer *layer) -{ - struct cfctrl_request_info *p, *tmp; - struct cfctrl *ctrl = container_obj(layer); - - spin_lock_bh(&ctrl->info_list_lock); - list_for_each_entry_safe(p, tmp, &ctrl->list, list) { - list_del(&p->list); - kfree(p); - } - spin_unlock_bh(&ctrl->info_list_lock); - kfree(layer); -} - -static bool param_eq(const struct cfctrl_link_param *p1, - const struct cfctrl_link_param *p2) -{ - bool eq = - p1->linktype == p2->linktype && - p1->priority == p2->priority && - p1->phyid == p2->phyid && - p1->endpoint == p2->endpoint && p1->chtype == p2->chtype; - - if (!eq) - return false; - - switch (p1->linktype) { - case CFCTRL_SRV_VEI: - return true; - case CFCTRL_SRV_DATAGRAM: - return p1->u.datagram.connid == p2->u.datagram.connid; - case CFCTRL_SRV_RFM: - return - p1->u.rfm.connid == p2->u.rfm.connid && - strcmp(p1->u.rfm.volume, p2->u.rfm.volume) == 0; - case CFCTRL_SRV_UTIL: - return - p1->u.utility.fifosize_kb == p2->u.utility.fifosize_kb - && p1->u.utility.fifosize_bufs == - p2->u.utility.fifosize_bufs - && strcmp(p1->u.utility.name, p2->u.utility.name) == 0 - && p1->u.utility.paramlen == p2->u.utility.paramlen - && memcmp(p1->u.utility.params, p2->u.utility.params, - p1->u.utility.paramlen) == 0; - - case CFCTRL_SRV_VIDEO: - return p1->u.video.connid == p2->u.video.connid; - case CFCTRL_SRV_DBG: - return true; - case CFCTRL_SRV_DECM: - return false; - default: - return false; - } - return false; -} - -static bool cfctrl_req_eq(const struct cfctrl_request_info *r1, - const struct cfctrl_request_info *r2) -{ - if (r1->cmd != r2->cmd) - return false; - if (r1->cmd == CFCTRL_CMD_LINK_SETUP) - return param_eq(&r1->param, &r2->param); - else - return r1->channel_id == r2->channel_id; -} - -/* Insert request at the end */ -static void cfctrl_insert_req(struct cfctrl *ctrl, - struct cfctrl_request_info *req) -{ - spin_lock_bh(&ctrl->info_list_lock); - atomic_inc(&ctrl->req_seq_no); - req->sequence_no = atomic_read(&ctrl->req_seq_no); - list_add_tail(&req->list, &ctrl->list); - spin_unlock_bh(&ctrl->info_list_lock); -} - -/* Compare and remove request */ -static struct cfctrl_request_info *cfctrl_remove_req(struct cfctrl *ctrl, - struct cfctrl_request_info *req) -{ - struct cfctrl_request_info *p, *tmp, *first; - - first = list_first_entry(&ctrl->list, struct cfctrl_request_info, list); - - list_for_each_entry_safe(p, tmp, &ctrl->list, list) { - if (cfctrl_req_eq(req, p)) { - if (p != first) - pr_warn("Requests are not received in order\n"); - - atomic_set(&ctrl->rsp_seq_no, - p->sequence_no); - list_del(&p->list); - goto out; - } - } - p = NULL; -out: - return p; -} - -struct cfctrl_rsp *cfctrl_get_respfuncs(struct cflayer *layer) -{ - struct cfctrl *this = container_obj(layer); - return &this->res; -} - -static void init_info(struct caif_payload_info *info, struct cfctrl *cfctrl) -{ - info->hdr_len = 0; - info->channel_id = cfctrl->serv.layer.id; - info->dev_info = &cfctrl->serv.dev_info; -} - -void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid) -{ - struct cfpkt *pkt; - struct cfctrl *cfctrl = container_obj(layer); - struct cflayer *dn = cfctrl->serv.layer.dn; - - if (!dn) { - pr_debug("not able to send enum request\n"); - return; - } - pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); - if (!pkt) - return; - caif_assert(offsetof(struct cfctrl, serv.layer) == 0); - init_info(cfpkt_info(pkt), cfctrl); - cfpkt_info(pkt)->dev_info->id = physlinkid; - cfctrl->serv.dev_info.id = physlinkid; - cfpkt_addbdy(pkt, CFCTRL_CMD_ENUM); - cfpkt_addbdy(pkt, physlinkid); - cfpkt_set_prio(pkt, TC_PRIO_CONTROL); - dn->transmit(dn, pkt); -} - -int cfctrl_linkup_request(struct cflayer *layer, - struct cfctrl_link_param *param, - struct cflayer *user_layer) -{ - struct cfctrl *cfctrl = container_obj(layer); - struct cflayer *dn = cfctrl->serv.layer.dn; - char utility_name[UTILITY_NAME_LENGTH]; - struct cfctrl_request_info *req; - struct cfpkt *pkt; - u32 tmp32; - u16 tmp16; - u8 tmp8; - int ret; - - if (!dn) { - pr_debug("not able to send linkup request\n"); - return -ENODEV; - } - - if (cfctrl_cancel_req(layer, user_layer) > 0) { - /* Slight Paranoia, check if already connecting */ - pr_err("Duplicate connect request for same client\n"); - WARN_ON(1); - return -EALREADY; - } - - pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); - if (!pkt) - return -ENOMEM; - cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_SETUP); - cfpkt_addbdy(pkt, (param->chtype << 4) | param->linktype); - cfpkt_addbdy(pkt, (param->priority << 3) | param->phyid); - cfpkt_addbdy(pkt, param->endpoint & 0x03); - - switch (param->linktype) { - case CFCTRL_SRV_VEI: - break; - case CFCTRL_SRV_VIDEO: - cfpkt_addbdy(pkt, (u8) param->u.video.connid); - break; - case CFCTRL_SRV_DBG: - break; - case CFCTRL_SRV_DATAGRAM: - tmp32 = cpu_to_le32(param->u.datagram.connid); - cfpkt_add_body(pkt, &tmp32, 4); - break; - case CFCTRL_SRV_RFM: - /* Construct a frame, convert DatagramConnectionID to network - * format long and copy it out... - */ - tmp32 = cpu_to_le32(param->u.rfm.connid); - cfpkt_add_body(pkt, &tmp32, 4); - /* Add volume name, including zero termination... */ - cfpkt_add_body(pkt, param->u.rfm.volume, - strlen(param->u.rfm.volume) + 1); - break; - case CFCTRL_SRV_UTIL: - tmp16 = cpu_to_le16(param->u.utility.fifosize_kb); - cfpkt_add_body(pkt, &tmp16, 2); - tmp16 = cpu_to_le16(param->u.utility.fifosize_bufs); - cfpkt_add_body(pkt, &tmp16, 2); - strscpy_pad(utility_name, param->u.utility.name); - cfpkt_add_body(pkt, utility_name, UTILITY_NAME_LENGTH); - tmp8 = param->u.utility.paramlen; - cfpkt_add_body(pkt, &tmp8, 1); - cfpkt_add_body(pkt, param->u.utility.params, - param->u.utility.paramlen); - break; - default: - pr_warn("Request setup of bad link type = %d\n", - param->linktype); - cfpkt_destroy(pkt); - return -EINVAL; - } - req = kzalloc_obj(*req); - if (!req) { - cfpkt_destroy(pkt); - return -ENOMEM; - } - - req->client_layer = user_layer; - req->cmd = CFCTRL_CMD_LINK_SETUP; - req->param = *param; - cfctrl_insert_req(cfctrl, req); - init_info(cfpkt_info(pkt), cfctrl); - /* - * NOTE:Always send linkup and linkdown request on the same - * device as the payload. Otherwise old queued up payload - * might arrive with the newly allocated channel ID. - */ - cfpkt_info(pkt)->dev_info->id = param->phyid; - cfpkt_set_prio(pkt, TC_PRIO_CONTROL); - ret = - dn->transmit(dn, pkt); - if (ret < 0) { - int count; - - count = cfctrl_cancel_req(&cfctrl->serv.layer, - user_layer); - if (count != 1) { - pr_err("Could not remove request (%d)", count); - return -ENODEV; - } - } - return 0; -} - -int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid, - struct cflayer *client) -{ - int ret; - struct cfpkt *pkt; - struct cfctrl *cfctrl = container_obj(layer); - struct cflayer *dn = cfctrl->serv.layer.dn; - - if (!dn) { - pr_debug("not able to send link-down request\n"); - return -ENODEV; - } - pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); - if (!pkt) - return -ENOMEM; - cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_DESTROY); - cfpkt_addbdy(pkt, channelid); - init_info(cfpkt_info(pkt), cfctrl); - cfpkt_set_prio(pkt, TC_PRIO_CONTROL); - ret = - dn->transmit(dn, pkt); -#ifndef CAIF_NO_LOOP - cfctrl->loop_linkused[channelid] = 0; -#endif - return ret; -} - -int cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer) -{ - struct cfctrl_request_info *p, *tmp; - struct cfctrl *ctrl = container_obj(layr); - int found = 0; - spin_lock_bh(&ctrl->info_list_lock); - - list_for_each_entry_safe(p, tmp, &ctrl->list, list) { - if (p->client_layer == adap_layer) { - list_del(&p->list); - kfree(p); - found++; - } - } - - spin_unlock_bh(&ctrl->info_list_lock); - return found; -} - -static int cfctrl_link_setup(struct cfctrl *cfctrl, struct cfpkt *pkt, u8 cmdrsp) -{ - u8 len; - u8 linkid = 0; - enum cfctrl_srv serv; - enum cfctrl_srv servtype; - u8 endpoint; - u8 physlinkid; - u8 prio; - u8 tmp; - u8 *cp; - int i; - struct cfctrl_link_param linkparam; - struct cfctrl_request_info rsp, *req; - - memset(&linkparam, 0, sizeof(linkparam)); - - tmp = cfpkt_extr_head_u8(pkt); - - serv = tmp & CFCTRL_SRV_MASK; - linkparam.linktype = serv; - - servtype = tmp >> 4; - linkparam.chtype = servtype; - - tmp = cfpkt_extr_head_u8(pkt); - physlinkid = tmp & 0x07; - prio = tmp >> 3; - - linkparam.priority = prio; - linkparam.phyid = physlinkid; - endpoint = cfpkt_extr_head_u8(pkt); - linkparam.endpoint = endpoint & 0x03; - - switch (serv) { - case CFCTRL_SRV_VEI: - case CFCTRL_SRV_DBG: - if (CFCTRL_ERR_BIT & cmdrsp) - break; - /* Link ID */ - linkid = cfpkt_extr_head_u8(pkt); - break; - case CFCTRL_SRV_VIDEO: - tmp = cfpkt_extr_head_u8(pkt); - linkparam.u.video.connid = tmp; - if (CFCTRL_ERR_BIT & cmdrsp) - break; - /* Link ID */ - linkid = cfpkt_extr_head_u8(pkt); - break; - - case CFCTRL_SRV_DATAGRAM: - linkparam.u.datagram.connid = cfpkt_extr_head_u32(pkt); - if (CFCTRL_ERR_BIT & cmdrsp) - break; - /* Link ID */ - linkid = cfpkt_extr_head_u8(pkt); - break; - case CFCTRL_SRV_RFM: - /* Construct a frame, convert - * DatagramConnectionID - * to network format long and copy it out... - */ - linkparam.u.rfm.connid = cfpkt_extr_head_u32(pkt); - cp = (u8 *) linkparam.u.rfm.volume; - for (tmp = cfpkt_extr_head_u8(pkt); - cfpkt_more(pkt) && tmp != '\0'; - tmp = cfpkt_extr_head_u8(pkt)) - *cp++ = tmp; - *cp = '\0'; - - if (CFCTRL_ERR_BIT & cmdrsp) - break; - /* Link ID */ - linkid = cfpkt_extr_head_u8(pkt); - - break; - case CFCTRL_SRV_UTIL: - /* Construct a frame, convert - * DatagramConnectionID - * to network format long and copy it out... - */ - /* Fifosize KB */ - linkparam.u.utility.fifosize_kb = cfpkt_extr_head_u16(pkt); - /* Fifosize bufs */ - linkparam.u.utility.fifosize_bufs = cfpkt_extr_head_u16(pkt); - /* name */ - cp = (u8 *) linkparam.u.utility.name; - caif_assert(sizeof(linkparam.u.utility.name) - >= UTILITY_NAME_LENGTH); - for (i = 0; i < UTILITY_NAME_LENGTH && cfpkt_more(pkt); i++) { - tmp = cfpkt_extr_head_u8(pkt); - *cp++ = tmp; - } - /* Length */ - len = cfpkt_extr_head_u8(pkt); - linkparam.u.utility.paramlen = len; - /* Param Data */ - cp = linkparam.u.utility.params; - while (cfpkt_more(pkt) && len--) { - tmp = cfpkt_extr_head_u8(pkt); - *cp++ = tmp; - } - if (CFCTRL_ERR_BIT & cmdrsp) - break; - /* Link ID */ - linkid = cfpkt_extr_head_u8(pkt); - /* Length */ - len = cfpkt_extr_head_u8(pkt); - /* Param Data */ - cfpkt_extr_head(pkt, NULL, len); - break; - default: - pr_warn("Request setup, invalid type (%d)\n", serv); - return -1; - } - - rsp.cmd = CFCTRL_CMD_LINK_SETUP; - rsp.param = linkparam; - spin_lock_bh(&cfctrl->info_list_lock); - req = cfctrl_remove_req(cfctrl, &rsp); - - if (CFCTRL_ERR_BIT == (CFCTRL_ERR_BIT & cmdrsp) || - cfpkt_erroneous(pkt)) { - pr_err("Invalid O/E bit or parse error " - "on CAIF control channel\n"); - cfctrl->res.reject_rsp(cfctrl->serv.layer.up, 0, - req ? req->client_layer : NULL); - } else { - cfctrl->res.linksetup_rsp(cfctrl->serv.layer.up, linkid, - serv, physlinkid, - req ? req->client_layer : NULL); - } - - kfree(req); - - spin_unlock_bh(&cfctrl->info_list_lock); - - return 0; -} - -static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) -{ - u8 cmdrsp; - u8 cmd; - int ret = 0; - u8 linkid = 0; - struct cfctrl *cfctrl = container_obj(layer); - - cmdrsp = cfpkt_extr_head_u8(pkt); - cmd = cmdrsp & CFCTRL_CMD_MASK; - if (cmd != CFCTRL_CMD_LINK_ERR - && CFCTRL_RSP_BIT != (CFCTRL_RSP_BIT & cmdrsp) - && CFCTRL_ERR_BIT != (CFCTRL_ERR_BIT & cmdrsp)) { - if (handle_loop(cfctrl, cmd, pkt) != 0) - cmdrsp |= CFCTRL_ERR_BIT; - } - - switch (cmd) { - case CFCTRL_CMD_LINK_SETUP: - ret = cfctrl_link_setup(cfctrl, pkt, cmdrsp); - break; - case CFCTRL_CMD_LINK_DESTROY: - linkid = cfpkt_extr_head_u8(pkt); - cfctrl->res.linkdestroy_rsp(cfctrl->serv.layer.up, linkid); - break; - case CFCTRL_CMD_LINK_ERR: - pr_err("Frame Error Indication received\n"); - cfctrl->res.linkerror_ind(); - break; - case CFCTRL_CMD_ENUM: - cfctrl->res.enum_rsp(); - break; - case CFCTRL_CMD_SLEEP: - cfctrl->res.sleep_rsp(); - break; - case CFCTRL_CMD_WAKE: - cfctrl->res.wake_rsp(); - break; - case CFCTRL_CMD_LINK_RECONF: - cfctrl->res.restart_rsp(); - break; - case CFCTRL_CMD_RADIO_SET: - cfctrl->res.radioset_rsp(); - break; - default: - pr_err("Unrecognized Control Frame\n"); - ret = -1; - goto error; - } -error: - cfpkt_destroy(pkt); - return ret; -} - -static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, - int phyid) -{ - struct cfctrl *this = container_obj(layr); - switch (ctrl) { - case _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND: - case CAIF_CTRLCMD_FLOW_OFF_IND: - spin_lock_bh(&this->info_list_lock); - if (!list_empty(&this->list)) - pr_debug("Received flow off in control layer\n"); - spin_unlock_bh(&this->info_list_lock); - break; - case _CAIF_CTRLCMD_PHYIF_DOWN_IND: { - struct cfctrl_request_info *p, *tmp; - - /* Find all connect request and report failure */ - spin_lock_bh(&this->info_list_lock); - list_for_each_entry_safe(p, tmp, &this->list, list) { - if (p->param.phyid == phyid) { - list_del(&p->list); - p->client_layer->ctrlcmd(p->client_layer, - CAIF_CTRLCMD_INIT_FAIL_RSP, - phyid); - kfree(p); - } - } - spin_unlock_bh(&this->info_list_lock); - break; - } - default: - break; - } -} - -#ifndef CAIF_NO_LOOP -static int handle_loop(struct cfctrl *ctrl, int cmd, struct cfpkt *pkt) -{ - static int last_linkid; - static int dec; - u8 linkid, linktype, tmp; - switch (cmd) { - case CFCTRL_CMD_LINK_SETUP: - spin_lock_bh(&ctrl->loop_linkid_lock); - if (!dec) { - for (linkid = last_linkid + 1; linkid < 254; linkid++) - if (!ctrl->loop_linkused[linkid]) - goto found; - } - dec = 1; - for (linkid = last_linkid - 1; linkid > 1; linkid--) - if (!ctrl->loop_linkused[linkid]) - goto found; - spin_unlock_bh(&ctrl->loop_linkid_lock); - return -1; -found: - if (linkid < 10) - dec = 0; - - if (!ctrl->loop_linkused[linkid]) - ctrl->loop_linkused[linkid] = 1; - - last_linkid = linkid; - - cfpkt_add_trail(pkt, &linkid, 1); - spin_unlock_bh(&ctrl->loop_linkid_lock); - cfpkt_peek_head(pkt, &linktype, 1); - if (linktype == CFCTRL_SRV_UTIL) { - tmp = 0x01; - cfpkt_add_trail(pkt, &tmp, 1); - cfpkt_add_trail(pkt, &tmp, 1); - } - break; - - case CFCTRL_CMD_LINK_DESTROY: - spin_lock_bh(&ctrl->loop_linkid_lock); - cfpkt_peek_head(pkt, &linkid, 1); - ctrl->loop_linkused[linkid] = 0; - spin_unlock_bh(&ctrl->loop_linkid_lock); - break; - default: - break; - } - return 0; -} -#endif diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c deleted file mode 100644 index 57ad3f82e004..000000000000 --- a/net/caif/cfdbgl.c +++ /dev/null @@ -1,55 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ - -#include <linux/stddef.h> -#include <linux/slab.h> -#include <net/caif/caif_layer.h> -#include <net/caif/cfsrvl.h> -#include <net/caif/cfpkt.h> - -#define container_obj(layr) ((struct cfsrvl *) layr) - -static int cfdbgl_receive(struct cflayer *layr, struct cfpkt *pkt); -static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt); - -struct cflayer *cfdbgl_create(u8 channel_id, struct dev_info *dev_info) -{ - struct cfsrvl *dbg = kzalloc_obj(struct cfsrvl, GFP_ATOMIC); - if (!dbg) - return NULL; - caif_assert(offsetof(struct cfsrvl, layer) == 0); - cfsrvl_init(dbg, channel_id, dev_info, false); - dbg->layer.receive = cfdbgl_receive; - dbg->layer.transmit = cfdbgl_transmit; - snprintf(dbg->layer.name, CAIF_LAYER_NAME_SZ, "dbg%d", channel_id); - return &dbg->layer; -} - -static int cfdbgl_receive(struct cflayer *layr, struct cfpkt *pkt) -{ - return layr->up->receive(layr->up, pkt); -} - -static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt) -{ - struct cfsrvl *service = container_obj(layr); - struct caif_payload_info *info; - int ret; - - if (!cfsrvl_ready(service, &ret)) { - cfpkt_destroy(pkt); - return ret; - } - - /* Add info for MUX-layer to route the packet out */ - info = cfpkt_info(pkt); - info->channel_id = service->layer.id; - info->dev_info = &service->dev_info; - - return layr->dn->transmit(layr->dn, pkt); -} diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c deleted file mode 100644 index c451ddd155a7..000000000000 --- a/net/caif/cfdgml.c +++ /dev/null @@ -1,113 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ - -#include <linux/stddef.h> -#include <linux/spinlock.h> -#include <linux/slab.h> -#include <net/caif/caif_layer.h> -#include <net/caif/cfsrvl.h> -#include <net/caif/cfpkt.h> - - -#define container_obj(layr) ((struct cfsrvl *) layr) - -#define DGM_CMD_BIT 0x80 -#define DGM_FLOW_OFF 0x81 -#define DGM_FLOW_ON 0x80 -#define DGM_MTU 1500 - -static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt); -static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt); - -struct cflayer *cfdgml_create(u8 channel_id, struct dev_info *dev_info) -{ - struct cfsrvl *dgm = kzalloc_obj(struct cfsrvl, GFP_ATOMIC); - if (!dgm) - return NULL; - caif_assert(offsetof(struct cfsrvl, layer) == 0); - cfsrvl_init(dgm, channel_id, dev_info, true); - dgm->layer.receive = cfdgml_receive; - dgm->layer.transmit = cfdgml_transmit; - snprintf(dgm->layer.name, CAIF_LAYER_NAME_SZ, "dgm%d", channel_id); - return &dgm->layer; -} - -static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt) -{ - u8 cmd = -1; - u8 dgmhdr[3]; - int ret; - caif_assert(layr->up != NULL); - caif_assert(layr->receive != NULL); - caif_assert(layr->ctrlcmd != NULL); - - if (cfpkt_extr_head(pkt, &cmd, 1) < 0) { - pr_err("Packet is erroneous!\n"); - cfpkt_destroy(pkt); - return -EPROTO; - } - - if ((cmd & DGM_CMD_BIT) == 0) { - if (cfpkt_extr_head(pkt, &dgmhdr, 3) < 0) { - pr_err("Packet is erroneous!\n"); - cfpkt_destroy(pkt); - return -EPROTO; - } - ret = layr->up->receive(layr->up, pkt); - return ret; - } - - switch (cmd) { - case DGM_FLOW_OFF: /* FLOW OFF */ - layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_OFF_IND, 0); - cfpkt_destroy(pkt); - return 0; - case DGM_FLOW_ON: /* FLOW ON */ - layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_ON_IND, 0); - cfpkt_destroy(pkt); - return 0; - default: - cfpkt_destroy(pkt); - pr_info("Unknown datagram control %d (0x%x)\n", cmd, cmd); - return -EPROTO; - } -} - -static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt) -{ - u8 packet_type; - u32 zero = 0; - struct caif_payload_info *info; - struct cfsrvl *service = container_obj(layr); - int ret; - - if (!cfsrvl_ready(service, &ret)) { - cfpkt_destroy(pkt); - return ret; - } - - /* STE Modem cannot handle more than 1500 bytes datagrams */ - if (cfpkt_getlen(pkt) > DGM_MTU) { - cfpkt_destroy(pkt); - return -EMSGSIZE; - } - - cfpkt_add_head(pkt, &zero, 3); - packet_type = 0x08; /* B9 set - UNCLASSIFIED */ - cfpkt_add_head(pkt, &packet_type, 1); - - /* Add info for MUX-layer to route the packet out. */ - info = cfpkt_info(pkt); - info->channel_id = service->layer.id; - /* To optimize alignment, we add up the size of CAIF header - * before payload. - */ - info->hdr_len = 4; - info->dev_info = &service->dev_info; - return layr->dn->transmit(layr->dn, pkt); -} diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c deleted file mode 100644 index 0f4979d89fcb..000000000000 --- a/net/caif/cffrml.c +++ /dev/null @@ -1,204 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * CAIF Framing Layer. - * - * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ - -#include <linux/stddef.h> -#include <linux/spinlock.h> -#include <linux/slab.h> -#include <linux/crc-ccitt.h> -#include <linux/netdevice.h> -#include <net/caif/caif_layer.h> -#include <net/caif/cfpkt.h> -#include <net/caif/cffrml.h> - -#define container_obj(layr) container_of(layr, struct cffrml, layer) - -struct cffrml { - struct cflayer layer; - bool dofcs; /* !< FCS active */ - int __percpu *pcpu_refcnt; -}; - -static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt); -static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt); -static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, - int phyid); - -static u32 cffrml_rcv_error; -static u32 cffrml_rcv_checsum_error; -struct cflayer *cffrml_create(u16 phyid, bool use_fcs) -{ - struct cffrml *this = kzalloc_obj(struct cffrml, GFP_ATOMIC); - if (!this) - return NULL; - this->pcpu_refcnt = alloc_percpu(int); - if (this->pcpu_refcnt == NULL) { - kfree(this); - return NULL; - } - - caif_assert(offsetof(struct cffrml, layer) == 0); - - this->layer.receive = cffrml_receive; - this->layer.transmit = cffrml_transmit; - this->layer.ctrlcmd = cffrml_ctrlcmd; - snprintf(this->layer.name, CAIF_LAYER_NAME_SZ, "frm%d", phyid); - this->dofcs = use_fcs; - this->layer.id = phyid; - return (struct cflayer *) this; -} - -void cffrml_free(struct cflayer *layer) -{ - struct cffrml *this = container_obj(layer); - free_percpu(this->pcpu_refcnt); - kfree(layer); -} - -void cffrml_set_uplayer(struct cflayer *this, struct cflayer *up) -{ - this->up = up; -} - -void cffrml_set_dnlayer(struct cflayer *this, struct cflayer *dn) -{ - this->dn = dn; -} - -static u16 cffrml_checksum(u16 chks, void *buf, u16 len) -{ - /* FIXME: FCS should be moved to glue in order to use OS-Specific - * solutions - */ - return crc_ccitt(chks, buf, len); -} - -static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt) -{ - u16 tmp; - u16 len; - u16 hdrchks; - int pktchks; - struct cffrml *this; - this = container_obj(layr); - - cfpkt_extr_head(pkt, &tmp, 2); - len = le16_to_cpu(tmp); - - /* Subtract for FCS on length if FCS is not used. */ - if (!this->dofcs) { - if (len < 2) { - ++cffrml_rcv_error; - pr_err("Invalid frame length (%d)\n", len); - cfpkt_destroy(pkt); - return -EPROTO; - } - len -= 2; - } - - if (cfpkt_setlen(pkt, len) < 0) { - ++cffrml_rcv_error; - pr_err("Framing length error (%d)\n", len); - cfpkt_destroy(pkt); - return -EPROTO; - } - /* - * Don't do extract if FCS is false, rather do setlen - then we don't - * get a cache-miss. - */ - if (this->dofcs) { - cfpkt_extr_trail(pkt, &tmp, 2); - hdrchks = le16_to_cpu(tmp); - pktchks = cfpkt_iterate(pkt, cffrml_checksum, 0xffff); - if (pktchks != hdrchks) { - cfpkt_add_trail(pkt, &tmp, 2); - ++cffrml_rcv_error; - ++cffrml_rcv_checsum_error; - pr_info("Frame checksum error (0x%x != 0x%x)\n", - hdrchks, pktchks); - return -EILSEQ; - } - } - if (cfpkt_erroneous(pkt)) { - ++cffrml_rcv_error; - pr_err("Packet is erroneous!\n"); - cfpkt_destroy(pkt); - return -EPROTO; - } - - if (layr->up == NULL) { - pr_err("Layr up is missing!\n"); - cfpkt_destroy(pkt); - return -EINVAL; - } - - return layr->up->receive(layr->up, pkt); -} - -static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt) -{ - u16 chks; - u16 len; - __le16 data; - - struct cffrml *this = container_obj(layr); - if (this->dofcs) { - chks = cfpkt_iterate(pkt, cffrml_checksum, 0xffff); - data = cpu_to_le16(chks); - cfpkt_add_trail(pkt, &data, 2); - } else { - cfpkt_pad_trail(pkt, 2); - } - len = cfpkt_getlen(pkt); - data = cpu_to_le16(len); - cfpkt_add_head(pkt, &data, 2); - cfpkt_info(pkt)->hdr_len += 2; - if (cfpkt_erroneous(pkt)) { - pr_err("Packet is erroneous!\n"); - cfpkt_destroy(pkt); - return -EPROTO; - } - - if (layr->dn == NULL) { - cfpkt_destroy(pkt); - return -ENODEV; - - } - return layr->dn->transmit(layr->dn, pkt); -} - -static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, - int phyid) -{ - if (layr->up && layr->up->ctrlcmd) - layr->up->ctrlcmd(layr->up, ctrl, layr->id); -} - -void cffrml_put(struct cflayer *layr) -{ - struct cffrml *this = container_obj(layr); - if (layr != NULL && this->pcpu_refcnt != NULL) - this_cpu_dec(*this->pcpu_refcnt); -} - -void cffrml_hold(struct cflayer *layr) -{ - struct cffrml *this = container_obj(layr); - if (layr != NULL && this->pcpu_refcnt != NULL) - this_cpu_inc(*this->pcpu_refcnt); -} - -int cffrml_refcnt_read(struct cflayer *layr) -{ - int i, refcnt = 0; - struct cffrml *this = container_obj(layr); - for_each_possible_cpu(i) - refcnt += *per_cpu_ptr(this->pcpu_refcnt, i); - return refcnt; -} diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c deleted file mode 100644 index 77a1f31639b7..000000000000 --- a/net/caif/cfmuxl.c +++ /dev/null @@ -1,267 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ - -#include <linux/stddef.h> -#include <linux/spinlock.h> -#include <linux/slab.h> -#include <linux/rculist.h> -#include <net/caif/cfpkt.h> -#include <net/caif/cfmuxl.h> -#include <net/caif/cfsrvl.h> -#include <net/caif/cffrml.h> - -#define container_obj(layr) container_of(layr, struct cfmuxl, layer) - -#define CAIF_CTRL_CHANNEL 0 -#define UP_CACHE_SIZE 8 -#define DN_CACHE_SIZE 8 - -struct cfmuxl { - struct cflayer layer; - struct list_head srvl_list; - struct list_head frml_list; - struct cflayer *up_cache[UP_CACHE_SIZE]; - struct cflayer *dn_cache[DN_CACHE_SIZE]; - /* - * Set when inserting or removing downwards layers. - */ - spinlock_t transmit_lock; - - /* - * Set when inserting or removing upwards layers. - */ - spinlock_t receive_lock; - -}; - -static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt); -static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt); -static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, - int phyid); -static struct cflayer *get_up(struct cfmuxl *muxl, u16 id); - -struct cflayer *cfmuxl_create(void) -{ - struct cfmuxl *this = kzalloc_obj(struct cfmuxl, GFP_ATOMIC); - - if (!this) - return NULL; - this->layer.receive = cfmuxl_receive; - this->layer.transmit = cfmuxl_transmit; - this->layer.ctrlcmd = cfmuxl_ctrlcmd; - INIT_LIST_HEAD(&this->srvl_list); - INIT_LIST_HEAD(&this->frml_list); - spin_lock_init(&this->transmit_lock); - spin_lock_init(&this->receive_lock); - snprintf(this->layer.name, CAIF_LAYER_NAME_SZ, "mux"); - return &this->layer; -} - -int cfmuxl_set_dnlayer(struct cflayer *layr, struct cflayer *dn, u8 phyid) -{ - struct cfmuxl *muxl = (struct cfmuxl *) layr; - - spin_lock_bh(&muxl->transmit_lock); - list_add_rcu(&dn->node, &muxl->frml_list); - spin_unlock_bh(&muxl->transmit_lock); - return 0; -} - -static struct cflayer *get_from_id(struct list_head *list, u16 id) -{ - struct cflayer *lyr; - list_for_each_entry_rcu(lyr, list, node) { - if (lyr->id == id) - return lyr; - } - - return NULL; -} - -int cfmuxl_set_uplayer(struct cflayer *layr, struct cflayer *up, u8 linkid) -{ - struct cfmuxl *muxl = container_obj(layr); - struct cflayer *old; - - spin_lock_bh(&muxl->receive_lock); - - /* Two entries with same id is wrong, so remove old layer from mux */ - old = get_from_id(&muxl->srvl_list, linkid); - if (old != NULL) - list_del_rcu(&old->node); - - list_add_rcu(&up->node, &muxl->srvl_list); - spin_unlock_bh(&muxl->receive_lock); - - return 0; -} - -struct cflayer *cfmuxl_remove_dnlayer(struct cflayer *layr, u8 phyid) -{ - struct cfmuxl *muxl = container_obj(layr); - struct cflayer *dn; - int idx = phyid % DN_CACHE_SIZE; - - spin_lock_bh(&muxl->transmit_lock); - RCU_INIT_POINTER(muxl->dn_cache[idx], NULL); - dn = get_from_id(&muxl->frml_list, phyid); - if (dn == NULL) - goto out; - - list_del_rcu(&dn->node); - caif_assert(dn != NULL); -out: - spin_unlock_bh(&muxl->transmit_lock); - return dn; -} - -static struct cflayer *get_up(struct cfmuxl *muxl, u16 id) -{ - struct cflayer *up; - int idx = id % UP_CACHE_SIZE; - up = rcu_dereference(muxl->up_cache[idx]); - if (up == NULL || up->id != id) { - spin_lock_bh(&muxl->receive_lock); - up = get_from_id(&muxl->srvl_list, id); - rcu_assign_pointer(muxl->up_cache[idx], up); - spin_unlock_bh(&muxl->receive_lock); - } - return up; -} - -static struct cflayer *get_dn(struct cfmuxl *muxl, struct dev_info *dev_info) -{ - struct cflayer *dn; - int idx = dev_info->id % DN_CACHE_SIZE; - dn = rcu_dereference(muxl->dn_cache[idx]); - if (dn == NULL || dn->id != dev_info->id) { - spin_lock_bh(&muxl->transmit_lock); - dn = get_from_id(&muxl->frml_list, dev_info->id); - rcu_assign_pointer(muxl->dn_cache[idx], dn); - spin_unlock_bh(&muxl->transmit_lock); - } - return dn; -} - -struct cflayer *cfmuxl_remove_uplayer(struct cflayer *layr, u8 id) -{ - struct cflayer *up; - struct cfmuxl *muxl = container_obj(layr); - int idx = id % UP_CACHE_SIZE; - - if (id == 0) { - pr_warn("Trying to remove control layer\n"); - return NULL; - } - - spin_lock_bh(&muxl->receive_lock); - up = get_from_id(&muxl->srvl_list, id); - if (up == NULL) - goto out; - - RCU_INIT_POINTER(muxl->up_cache[idx], NULL); - list_del_rcu(&up->node); -out: - spin_unlock_bh(&muxl->receive_lock); - return up; -} - -static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt) -{ - int ret; - struct cfmuxl *muxl = container_obj(layr); - u8 id; - struct cflayer *up; - if (cfpkt_extr_head(pkt, &id, 1) < 0) { - pr_err("erroneous Caif Packet\n"); - cfpkt_destroy(pkt); - return -EPROTO; - } - rcu_read_lock(); - up = get_up(muxl, id); - - if (up == NULL) { - pr_debug("Received data on unknown link ID = %d (0x%x)" - " up == NULL", id, id); - cfpkt_destroy(pkt); - /* - * Don't return ERROR, since modem misbehaves and sends out - * flow on before linksetup response. - */ - - rcu_read_unlock(); - return /* CFGLU_EPROT; */ 0; - } - - /* We can't hold rcu_lock during receive, so take a ref count instead */ - cfsrvl_get(up); - rcu_read_unlock(); - - ret = up->receive(up, pkt); - - cfsrvl_put(up); - return ret; -} - -static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt) -{ - struct cfmuxl *muxl = container_obj(layr); - int err; - u8 linkid; - struct cflayer *dn; - struct caif_payload_info *info = cfpkt_info(pkt); - BUG_ON(!info); - - rcu_read_lock(); - - dn = get_dn(muxl, info->dev_info); - if (dn == NULL) { - pr_debug("Send data on unknown phy ID = %d (0x%x)\n", - info->dev_info->id, info->dev_info->id); - rcu_read_unlock(); - cfpkt_destroy(pkt); - return -ENOTCONN; - } - - info->hdr_len += 1; - linkid = info->channel_id; - cfpkt_add_head(pkt, &linkid, 1); - - /* We can't hold rcu_lock during receive, so take a ref count instead */ - cffrml_hold(dn); - - rcu_read_unlock(); - - err = dn->transmit(dn, pkt); - - cffrml_put(dn); - return err; -} - -static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, - int phyid) -{ - struct cfmuxl *muxl = container_obj(layr); - struct cflayer *layer; - - rcu_read_lock(); - list_for_each_entry_rcu(layer, &muxl->srvl_list, node) { - - if (cfsrvl_phyid_match(layer, phyid) && layer->ctrlcmd) { - - if ((ctrl == _CAIF_CTRLCMD_PHYIF_DOWN_IND || - ctrl == CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND) && - layer->id != 0) - cfmuxl_remove_uplayer(layr, layer->id); - - /* NOTE: ctrlcmd is not allowed to block */ - layer->ctrlcmd(layer, ctrl, phyid); - } - } - rcu_read_unlock(); -} diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c deleted file mode 100644 index 96236d21b18e..000000000000 --- a/net/caif/cfpkt_skbuff.c +++ /dev/null @@ -1,373 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ - -#include <linux/string.h> -#include <linux/skbuff.h> -#include <linux/export.h> -#include <net/caif/cfpkt.h> - -#define PKT_PREFIX 48 -#define PKT_POSTFIX 2 -#define PKT_LEN_WHEN_EXTENDING 128 -#define PKT_ERROR(pkt, errmsg) \ -do { \ - cfpkt_priv(pkt)->erronous = true; \ - skb_reset_tail_pointer(&pkt->skb); \ - pr_warn(errmsg); \ -} while (0) - -/* - * net/caif/ is generic and does not - * understand SKB, so we do this typecast - */ -struct cfpkt { - struct sk_buff skb; -}; - -/* Private data inside SKB */ -struct cfpkt_priv_data { - struct dev_info dev_info; - bool erronous; -}; - -static inline struct cfpkt_priv_data *cfpkt_priv(struct cfpkt *pkt) -{ - return (struct cfpkt_priv_data *) pkt->skb.cb; -} - -static inline bool is_erronous(struct cfpkt *pkt) -{ - return cfpkt_priv(pkt)->erronous; -} - -static inline struct sk_buff *pkt_to_skb(struct cfpkt *pkt) -{ - return &pkt->skb; -} - -static inline struct cfpkt *skb_to_pkt(struct sk_buff *skb) -{ - return (struct cfpkt *) skb; -} - -struct cfpkt *cfpkt_fromnative(enum caif_direction dir, void *nativepkt) -{ - struct cfpkt *pkt = skb_to_pkt(nativepkt); - cfpkt_priv(pkt)->erronous = false; - return pkt; -} -EXPORT_SYMBOL(cfpkt_fromnative); - -void *cfpkt_tonative(struct cfpkt *pkt) -{ - return (void *) pkt; -} -EXPORT_SYMBOL(cfpkt_tonative); - -static struct cfpkt *cfpkt_create_pfx(u16 len, u16 pfx) -{ - struct sk_buff *skb; - - skb = alloc_skb(len + pfx, GFP_ATOMIC); - if (unlikely(skb == NULL)) - return NULL; - - skb_reserve(skb, pfx); - return skb_to_pkt(skb); -} - -inline struct cfpkt *cfpkt_create(u16 len) -{ - return cfpkt_create_pfx(len + PKT_POSTFIX, PKT_PREFIX); -} - -void cfpkt_destroy(struct cfpkt *pkt) -{ - struct sk_buff *skb = pkt_to_skb(pkt); - kfree_skb(skb); -} - -inline bool cfpkt_more(struct cfpkt *pkt) -{ - struct sk_buff *skb = pkt_to_skb(pkt); - return skb->len > 0; -} - -int cfpkt_peek_head(struct cfpkt *pkt, void *data, u16 len) -{ - struct sk_buff *skb = pkt_to_skb(pkt); - if (skb_headlen(skb) >= len) { - memcpy(data, skb->data, len); - return 0; - } - return !cfpkt_extr_head(pkt, data, len) && - !cfpkt_add_head(pkt, data, len); -} - -int cfpkt_extr_head(struct cfpkt *pkt, void *data, u16 len) -{ - struct sk_buff *skb = pkt_to_skb(pkt); - u8 *from; - if (unlikely(is_erronous(pkt))) - return -EPROTO; - - if (unlikely(len > skb->len)) { - PKT_ERROR(pkt, "read beyond end of packet\n"); - return -EPROTO; - } - - if (unlikely(len > skb_headlen(skb))) { - if (unlikely(skb_linearize(skb) != 0)) { - PKT_ERROR(pkt, "linearize failed\n"); - return -EPROTO; - } - } - from = skb_pull(skb, len); - from -= len; - if (data) - memcpy(data, from, len); - return 0; -} -EXPORT_SYMBOL(cfpkt_extr_head); - -int cfpkt_extr_trail(struct cfpkt *pkt, void *dta, u16 len) -{ - struct sk_buff *skb = pkt_to_skb(pkt); - u8 *data = dta; - u8 *from; - if (unlikely(is_erronous(pkt))) - return -EPROTO; - - if (unlikely(skb_linearize(skb) != 0)) { - PKT_ERROR(pkt, "linearize failed\n"); - return -EPROTO; - } - if (unlikely(skb->data + len > skb_tail_pointer(skb))) { - PKT_ERROR(pkt, "read beyond end of packet\n"); - return -EPROTO; - } - from = skb_tail_pointer(skb) - len; - skb_trim(skb, skb->len - len); - memcpy(data, from, len); - return 0; -} - -int cfpkt_pad_trail(struct cfpkt *pkt, u16 len) -{ - return cfpkt_add_body(pkt, NULL, len); -} - -int cfpkt_add_body(struct cfpkt *pkt, const void *data, u16 len) -{ - struct sk_buff *skb = pkt_to_skb(pkt); - struct sk_buff *lastskb; - u8 *to; - u16 addlen = 0; - - - if (unlikely(is_erronous(pkt))) - return -EPROTO; - - lastskb = skb; - - /* Check whether we need to add space at the tail */ - if (unlikely(skb_tailroom(skb) < len)) { - if (likely(len < PKT_LEN_WHEN_EXTENDING)) - addlen = PKT_LEN_WHEN_EXTENDING; - else - addlen = len; - } - - /* Check whether we need to change the SKB before writing to the tail */ - if (unlikely((addlen > 0) || skb_cloned(skb) || skb_shared(skb))) { - - /* Make sure data is writable */ - if (unlikely(skb_cow_data(skb, addlen, &lastskb) < 0)) { - PKT_ERROR(pkt, "cow failed\n"); - return -EPROTO; - } - } - - /* All set to put the last SKB and optionally write data there. */ - to = pskb_put(skb, lastskb, len); - if (likely(data)) - memcpy(to, data, len); - return 0; -} - -inline int cfpkt_addbdy(struct cfpkt *pkt, u8 data) -{ - return cfpkt_add_body(pkt, &data, 1); -} - -int cfpkt_add_head(struct cfpkt *pkt, const void *data2, u16 len) -{ - struct sk_buff *skb = pkt_to_skb(pkt); - struct sk_buff *lastskb; - u8 *to; - const u8 *data = data2; - int ret; - if (unlikely(is_erronous(pkt))) - return -EPROTO; - if (unlikely(skb_headroom(skb) < len)) { - PKT_ERROR(pkt, "no headroom\n"); - return -EPROTO; - } - - /* Make sure data is writable */ - ret = skb_cow_data(skb, 0, &lastskb); - if (unlikely(ret < 0)) { - PKT_ERROR(pkt, "cow failed\n"); - return ret; - } - - to = skb_push(skb, len); - memcpy(to, data, len); - return 0; -} -EXPORT_SYMBOL(cfpkt_add_head); - -inline int cfpkt_add_trail(struct cfpkt *pkt, const void *data, u16 len) -{ - return cfpkt_add_body(pkt, data, len); -} - -inline u16 cfpkt_getlen(struct cfpkt *pkt) -{ - struct sk_buff *skb = pkt_to_skb(pkt); - return skb->len; -} - -int cfpkt_iterate(struct cfpkt *pkt, - u16 (*iter_func)(u16, void *, u16), - u16 data) -{ - /* - * Don't care about the performance hit of linearizing, - * Checksum should not be used on high-speed interfaces anyway. - */ - if (unlikely(is_erronous(pkt))) - return -EPROTO; - if (unlikely(skb_linearize(&pkt->skb) != 0)) { - PKT_ERROR(pkt, "linearize failed\n"); - return -EPROTO; - } - return iter_func(data, pkt->skb.data, cfpkt_getlen(pkt)); -} - -int cfpkt_setlen(struct cfpkt *pkt, u16 len) -{ - struct sk_buff *skb = pkt_to_skb(pkt); - - - if (unlikely(is_erronous(pkt))) - return -EPROTO; - - if (likely(len <= skb->len)) { - if (unlikely(skb->data_len)) - ___pskb_trim(skb, len); - else - skb_trim(skb, len); - - return cfpkt_getlen(pkt); - } - - /* Need to expand SKB */ - if (unlikely(!cfpkt_pad_trail(pkt, len - skb->len))) - PKT_ERROR(pkt, "skb_pad_trail failed\n"); - - return cfpkt_getlen(pkt); -} - -struct cfpkt *cfpkt_append(struct cfpkt *dstpkt, - struct cfpkt *addpkt, - u16 expectlen) -{ - struct sk_buff *dst = pkt_to_skb(dstpkt); - struct sk_buff *add = pkt_to_skb(addpkt); - u16 addlen = skb_headlen(add); - u16 neededtailspace; - struct sk_buff *tmp; - u16 dstlen; - u16 createlen; - if (unlikely(is_erronous(dstpkt) || is_erronous(addpkt))) { - return dstpkt; - } - - neededtailspace = max(expectlen, addlen); - - if (dst->tail + neededtailspace > dst->end) { - /* Create a dumplicate of 'dst' with more tail space */ - struct cfpkt *tmppkt; - dstlen = skb_headlen(dst); - createlen = dstlen + neededtailspace; - tmppkt = cfpkt_create(createlen + PKT_PREFIX + PKT_POSTFIX); - if (tmppkt == NULL) - return NULL; - tmp = pkt_to_skb(tmppkt); - skb_put_data(tmp, dst->data, dstlen); - cfpkt_destroy(dstpkt); - dst = tmp; - } - skb_put_data(dst, add->data, skb_headlen(add)); - cfpkt_destroy(addpkt); - return skb_to_pkt(dst); -} - -struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos) -{ - struct sk_buff *skb2; - struct sk_buff *skb = pkt_to_skb(pkt); - struct cfpkt *tmppkt; - u8 *split = skb->data + pos; - u16 len2nd = skb_tail_pointer(skb) - split; - - if (unlikely(is_erronous(pkt))) - return NULL; - - if (skb->data + pos > skb_tail_pointer(skb)) { - PKT_ERROR(pkt, "trying to split beyond end of packet\n"); - return NULL; - } - - /* Create a new packet for the second part of the data */ - tmppkt = cfpkt_create_pfx(len2nd + PKT_PREFIX + PKT_POSTFIX, - PKT_PREFIX); - if (tmppkt == NULL) - return NULL; - skb2 = pkt_to_skb(tmppkt); - - - if (skb2 == NULL) - return NULL; - - skb_put_data(skb2, split, len2nd); - - /* Reduce the length of the original packet */ - skb_trim(skb, pos); - - skb2->priority = skb->priority; - return skb_to_pkt(skb2); -} - -bool cfpkt_erroneous(struct cfpkt *pkt) -{ - return cfpkt_priv(pkt)->erronous; -} - -struct caif_payload_info *cfpkt_info(struct cfpkt *pkt) -{ - return (struct caif_payload_info *)&pkt_to_skb(pkt)->cb; -} -EXPORT_SYMBOL(cfpkt_info); - -void cfpkt_set_prio(struct cfpkt *pkt, int prio) -{ - pkt_to_skb(pkt)->priority = prio; -} -EXPORT_SYMBOL(cfpkt_set_prio); diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c deleted file mode 100644 index 93732ebbd1e2..000000000000 --- a/net/caif/cfrfml.c +++ /dev/null @@ -1,299 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ - -#include <linux/stddef.h> -#include <linux/spinlock.h> -#include <linux/slab.h> -#include <linux/unaligned.h> -#include <net/caif/caif_layer.h> -#include <net/caif/cfsrvl.h> -#include <net/caif/cfpkt.h> - -#define container_obj(layr) container_of(layr, struct cfrfml, serv.layer) -#define RFM_SEGMENTATION_BIT 0x01 -#define RFM_HEAD_SIZE 7 - -static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt); -static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt); - -struct cfrfml { - struct cfsrvl serv; - struct cfpkt *incomplete_frm; - int fragment_size; - u8 seghead[6]; - u16 pdu_size; - /* Protects serialized processing of packets */ - spinlock_t sync; -}; - -static void cfrfml_release(struct cflayer *layer) -{ - struct cfsrvl *srvl = container_of(layer, struct cfsrvl, layer); - struct cfrfml *rfml = container_obj(&srvl->layer); - - if (rfml->incomplete_frm) - cfpkt_destroy(rfml->incomplete_frm); - - kfree(srvl); -} - -struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info, - int mtu_size) -{ - int tmp; - struct cfrfml *this = kzalloc_obj(struct cfrfml, GFP_ATOMIC); - - if (!this) - return NULL; - - cfsrvl_init(&this->serv, channel_id, dev_info, false); - this->serv.release = cfrfml_release; - this->serv.layer.receive = cfrfml_receive; - this->serv.layer.transmit = cfrfml_transmit; - - /* Round down to closest multiple of 16 */ - tmp = (mtu_size - RFM_HEAD_SIZE - 6) / 16; - tmp *= 16; - - this->fragment_size = tmp; - spin_lock_init(&this->sync); - snprintf(this->serv.layer.name, CAIF_LAYER_NAME_SZ, - "rfm%d", channel_id); - - return &this->serv.layer; -} - -static struct cfpkt *rfm_append(struct cfrfml *rfml, char *seghead, - struct cfpkt *pkt, int *err) -{ - struct cfpkt *tmppkt; - *err = -EPROTO; - /* n-th but not last segment */ - - if (cfpkt_extr_head(pkt, seghead, 6) < 0) - return NULL; - - /* Verify correct header */ - if (memcmp(seghead, rfml->seghead, 6) != 0) - return NULL; - - tmppkt = cfpkt_append(rfml->incomplete_frm, pkt, - rfml->pdu_size + RFM_HEAD_SIZE); - - /* If cfpkt_append failes input pkts are not freed */ - *err = -ENOMEM; - if (tmppkt == NULL) - return NULL; - - *err = 0; - return tmppkt; -} - -static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt) -{ - u8 tmp; - bool segmented; - int err; - u8 seghead[6]; - struct cfrfml *rfml; - struct cfpkt *tmppkt = NULL; - - caif_assert(layr->up != NULL); - caif_assert(layr->receive != NULL); - rfml = container_obj(layr); - spin_lock(&rfml->sync); - - err = -EPROTO; - if (cfpkt_extr_head(pkt, &tmp, 1) < 0) - goto out; - segmented = tmp & RFM_SEGMENTATION_BIT; - - if (segmented) { - if (rfml->incomplete_frm == NULL) { - /* Initial Segment */ - if (cfpkt_peek_head(pkt, rfml->seghead, 6) != 0) - goto out; - - rfml->pdu_size = get_unaligned_le16(rfml->seghead+4); - - if (cfpkt_erroneous(pkt)) - goto out; - rfml->incomplete_frm = pkt; - pkt = NULL; - } else { - - tmppkt = rfm_append(rfml, seghead, pkt, &err); - if (tmppkt == NULL) - goto out; - - if (cfpkt_erroneous(tmppkt)) - goto out; - - rfml->incomplete_frm = tmppkt; - - - if (cfpkt_erroneous(tmppkt)) - goto out; - } - err = 0; - goto out; - } - - if (rfml->incomplete_frm) { - - /* Last Segment */ - tmppkt = rfm_append(rfml, seghead, pkt, &err); - if (tmppkt == NULL) - goto out; - - if (cfpkt_erroneous(tmppkt)) - goto out; - - rfml->incomplete_frm = NULL; - pkt = tmppkt; - tmppkt = NULL; - - /* Verify that length is correct */ - err = -EPROTO; - if (rfml->pdu_size != cfpkt_getlen(pkt) - RFM_HEAD_SIZE + 1) - goto out; - } - - err = rfml->serv.layer.up->receive(rfml->serv.layer.up, pkt); - -out: - - if (err != 0) { - if (tmppkt) - cfpkt_destroy(tmppkt); - if (pkt) - cfpkt_destroy(pkt); - if (rfml->incomplete_frm) - cfpkt_destroy(rfml->incomplete_frm); - rfml->incomplete_frm = NULL; - - pr_info("Connection error %d triggered on RFM link\n", err); - - /* Trigger connection error upon failure.*/ - layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, - rfml->serv.dev_info.id); - } - spin_unlock(&rfml->sync); - - if (unlikely(err == -EAGAIN)) - /* It is not possible to recover after drop of a fragment */ - err = -EIO; - - return err; -} - - -static int cfrfml_transmit_segment(struct cfrfml *rfml, struct cfpkt *pkt) -{ - caif_assert(cfpkt_getlen(pkt) < rfml->fragment_size + RFM_HEAD_SIZE); - - /* Add info for MUX-layer to route the packet out. */ - cfpkt_info(pkt)->channel_id = rfml->serv.layer.id; - - /* - * To optimize alignment, we add up the size of CAIF header before - * payload. - */ - cfpkt_info(pkt)->hdr_len = RFM_HEAD_SIZE; - cfpkt_info(pkt)->dev_info = &rfml->serv.dev_info; - - return rfml->serv.layer.dn->transmit(rfml->serv.layer.dn, pkt); -} - -static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt) -{ - int err; - u8 seg; - u8 head[6]; - struct cfpkt *rearpkt = NULL; - struct cfpkt *frontpkt = pkt; - struct cfrfml *rfml = container_obj(layr); - - caif_assert(layr->dn != NULL); - caif_assert(layr->dn->transmit != NULL); - - if (!cfsrvl_ready(&rfml->serv, &err)) - goto out; - - err = -EPROTO; - if (cfpkt_getlen(pkt) <= RFM_HEAD_SIZE-1) - goto out; - - err = 0; - if (cfpkt_getlen(pkt) > rfml->fragment_size + RFM_HEAD_SIZE) - err = cfpkt_peek_head(pkt, head, 6); - - if (err != 0) - goto out; - - while (cfpkt_getlen(frontpkt) > rfml->fragment_size + RFM_HEAD_SIZE) { - - seg = 1; - err = -EPROTO; - - if (cfpkt_add_head(frontpkt, &seg, 1) < 0) - goto out; - /* - * On OOM error cfpkt_split returns NULL. - * - * NOTE: Segmented pdu is not correctly aligned. - * This has negative performance impact. - */ - - rearpkt = cfpkt_split(frontpkt, rfml->fragment_size); - if (rearpkt == NULL) - goto out; - - err = cfrfml_transmit_segment(rfml, frontpkt); - - if (err != 0) { - frontpkt = NULL; - goto out; - } - - frontpkt = rearpkt; - rearpkt = NULL; - - err = -EPROTO; - if (cfpkt_add_head(frontpkt, head, 6) < 0) - goto out; - - } - - seg = 0; - err = -EPROTO; - - if (cfpkt_add_head(frontpkt, &seg, 1) < 0) - goto out; - - err = cfrfml_transmit_segment(rfml, frontpkt); - - frontpkt = NULL; -out: - - if (err != 0) { - pr_info("Connection error %d triggered on RFM link\n", err); - /* Trigger connection error upon failure.*/ - - layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, - rfml->serv.dev_info.id); - - if (rearpkt) - cfpkt_destroy(rearpkt); - - if (frontpkt) - cfpkt_destroy(frontpkt); - } - - return err; -} diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c deleted file mode 100644 index faf78fb754e2..000000000000 --- a/net/caif/cfserl.c +++ /dev/null @@ -1,192 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ - -#include <linux/stddef.h> -#include <linux/spinlock.h> -#include <linux/slab.h> -#include <net/caif/caif_layer.h> -#include <net/caif/cfpkt.h> -#include <net/caif/cfserl.h> - -#define container_obj(layr) ((struct cfserl *) layr) - -#define CFSERL_STX 0x02 -#define SERIAL_MINIUM_PACKET_SIZE 4 -#define SERIAL_MAX_FRAMESIZE 4096 -struct cfserl { - struct cflayer layer; - struct cfpkt *incomplete_frm; - /* Protects parallel processing of incoming packets */ - spinlock_t sync; - bool usestx; -}; - -static int cfserl_receive(struct cflayer *layr, struct cfpkt *pkt); -static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt); -static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, - int phyid); - -void cfserl_release(struct cflayer *layer) -{ - kfree(layer); -} - -struct cflayer *cfserl_create(int instance, bool use_stx) -{ - struct cfserl *this = kzalloc_obj(struct cfserl, GFP_ATOMIC); - if (!this) - return NULL; - caif_assert(offsetof(struct cfserl, layer) == 0); - this->layer.receive = cfserl_receive; - this->layer.transmit = cfserl_transmit; - this->layer.ctrlcmd = cfserl_ctrlcmd; - this->usestx = use_stx; - spin_lock_init(&this->sync); - snprintf(this->layer.name, CAIF_LAYER_NAME_SZ, "ser1"); - return &this->layer; -} - -static int cfserl_receive(struct cflayer *l, struct cfpkt *newpkt) -{ - struct cfserl *layr = container_obj(l); - u16 pkt_len; - struct cfpkt *pkt = NULL; - struct cfpkt *tail_pkt = NULL; - u8 tmp8; - u16 tmp; - u8 stx = CFSERL_STX; - int ret; - u16 expectlen = 0; - - caif_assert(newpkt != NULL); - spin_lock(&layr->sync); - - if (layr->incomplete_frm != NULL) { - layr->incomplete_frm = - cfpkt_append(layr->incomplete_frm, newpkt, expectlen); - pkt = layr->incomplete_frm; - if (pkt == NULL) { - spin_unlock(&layr->sync); - return -ENOMEM; - } - } else { - pkt = newpkt; - } - layr->incomplete_frm = NULL; - - do { - /* Search for STX at start of pkt if STX is used */ - if (layr->usestx) { - cfpkt_extr_head(pkt, &tmp8, 1); - if (tmp8 != CFSERL_STX) { - while (cfpkt_more(pkt) - && tmp8 != CFSERL_STX) { - cfpkt_extr_head(pkt, &tmp8, 1); - } - if (!cfpkt_more(pkt)) { - cfpkt_destroy(pkt); - layr->incomplete_frm = NULL; - spin_unlock(&layr->sync); - return -EPROTO; - } - } - } - - pkt_len = cfpkt_getlen(pkt); - - /* - * pkt_len is the accumulated length of the packet data - * we have received so far. - * Exit if frame doesn't hold length. - */ - - if (pkt_len < 2) { - if (layr->usestx) - cfpkt_add_head(pkt, &stx, 1); - layr->incomplete_frm = pkt; - spin_unlock(&layr->sync); - return 0; - } - - /* - * Find length of frame. - * expectlen is the length we need for a full frame. - */ - cfpkt_peek_head(pkt, &tmp, 2); - expectlen = le16_to_cpu(tmp) + 2; - /* - * Frame error handling - */ - if (expectlen < SERIAL_MINIUM_PACKET_SIZE - || expectlen > SERIAL_MAX_FRAMESIZE) { - if (!layr->usestx) { - if (pkt != NULL) - cfpkt_destroy(pkt); - layr->incomplete_frm = NULL; - spin_unlock(&layr->sync); - return -EPROTO; - } - continue; - } - - if (pkt_len < expectlen) { - /* Too little received data */ - if (layr->usestx) - cfpkt_add_head(pkt, &stx, 1); - layr->incomplete_frm = pkt; - spin_unlock(&layr->sync); - return 0; - } - - /* - * Enough data for at least one frame. - * Split the frame, if too long - */ - if (pkt_len > expectlen) - tail_pkt = cfpkt_split(pkt, expectlen); - else - tail_pkt = NULL; - - /* Send the first part of packet upwards.*/ - spin_unlock(&layr->sync); - ret = layr->layer.up->receive(layr->layer.up, pkt); - spin_lock(&layr->sync); - if (ret == -EILSEQ) { - if (layr->usestx) { - if (tail_pkt != NULL) - pkt = cfpkt_append(pkt, tail_pkt, 0); - /* Start search for next STX if frame failed */ - continue; - } else { - cfpkt_destroy(pkt); - pkt = NULL; - } - } - - pkt = tail_pkt; - - } while (pkt != NULL); - - spin_unlock(&layr->sync); - return 0; -} - -static int cfserl_transmit(struct cflayer *layer, struct cfpkt *newpkt) -{ - struct cfserl *layr = container_obj(layer); - u8 tmp8 = CFSERL_STX; - if (layr->usestx) - cfpkt_add_head(newpkt, &tmp8, 1); - return layer->dn->transmit(layer->dn, newpkt); -} - -static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, - int phyid) -{ - layr->up->ctrlcmd(layr->up, ctrl, phyid); -} diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c deleted file mode 100644 index d687fd0b4ed3..000000000000 --- a/net/caif/cfsrvl.c +++ /dev/null @@ -1,224 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ - -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/slab.h> -#include <linux/module.h> -#include <linux/pkt_sched.h> -#include <net/caif/caif_layer.h> -#include <net/caif/cfsrvl.h> -#include <net/caif/cfpkt.h> -#include <net/caif/caif_dev.h> - -#define SRVL_CTRL_PKT_SIZE 1 -#define SRVL_FLOW_OFF 0x81 -#define SRVL_FLOW_ON 0x80 -#define SRVL_SET_PIN 0x82 - -#define container_obj(layr) container_of(layr, struct cfsrvl, layer) - -static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, - int phyid) -{ - struct cfsrvl *service = container_obj(layr); - - if (layr->up == NULL || layr->up->ctrlcmd == NULL) - return; - - switch (ctrl) { - case CAIF_CTRLCMD_INIT_RSP: - service->open = true; - layr->up->ctrlcmd(layr->up, ctrl, phyid); - break; - case CAIF_CTRLCMD_DEINIT_RSP: - case CAIF_CTRLCMD_INIT_FAIL_RSP: - service->open = false; - layr->up->ctrlcmd(layr->up, ctrl, phyid); - break; - case _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND: - if (phyid != service->dev_info.id) - break; - if (service->modem_flow_on) - layr->up->ctrlcmd(layr->up, - CAIF_CTRLCMD_FLOW_OFF_IND, phyid); - service->phy_flow_on = false; - break; - case _CAIF_CTRLCMD_PHYIF_FLOW_ON_IND: - if (phyid != service->dev_info.id) - return; - if (service->modem_flow_on) { - layr->up->ctrlcmd(layr->up, - CAIF_CTRLCMD_FLOW_ON_IND, - phyid); - } - service->phy_flow_on = true; - break; - case CAIF_CTRLCMD_FLOW_OFF_IND: - if (service->phy_flow_on) { - layr->up->ctrlcmd(layr->up, - CAIF_CTRLCMD_FLOW_OFF_IND, phyid); - } - service->modem_flow_on = false; - break; - case CAIF_CTRLCMD_FLOW_ON_IND: - if (service->phy_flow_on) { - layr->up->ctrlcmd(layr->up, - CAIF_CTRLCMD_FLOW_ON_IND, phyid); - } - service->modem_flow_on = true; - break; - case _CAIF_CTRLCMD_PHYIF_DOWN_IND: - /* In case interface is down, let's fake a remove shutdown */ - layr->up->ctrlcmd(layr->up, - CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, phyid); - break; - case CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND: - layr->up->ctrlcmd(layr->up, ctrl, phyid); - break; - default: - pr_warn("Unexpected ctrl in cfsrvl (%d)\n", ctrl); - /* We have both modem and phy flow on, send flow on */ - layr->up->ctrlcmd(layr->up, ctrl, phyid); - service->phy_flow_on = true; - break; - } -} - -static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl) -{ - struct cfsrvl *service = container_obj(layr); - - caif_assert(layr != NULL); - caif_assert(layr->dn != NULL); - caif_assert(layr->dn->transmit != NULL); - - if (!service->supports_flowctrl) - return 0; - - switch (ctrl) { - case CAIF_MODEMCMD_FLOW_ON_REQ: - { - struct cfpkt *pkt; - struct caif_payload_info *info; - u8 flow_on = SRVL_FLOW_ON; - pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE); - if (!pkt) - return -ENOMEM; - - if (cfpkt_add_head(pkt, &flow_on, 1) < 0) { - pr_err("Packet is erroneous!\n"); - cfpkt_destroy(pkt); - return -EPROTO; - } - info = cfpkt_info(pkt); - info->channel_id = service->layer.id; - info->hdr_len = 1; - info->dev_info = &service->dev_info; - cfpkt_set_prio(pkt, TC_PRIO_CONTROL); - return layr->dn->transmit(layr->dn, pkt); - } - case CAIF_MODEMCMD_FLOW_OFF_REQ: - { - struct cfpkt *pkt; - struct caif_payload_info *info; - u8 flow_off = SRVL_FLOW_OFF; - pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE); - if (!pkt) - return -ENOMEM; - - if (cfpkt_add_head(pkt, &flow_off, 1) < 0) { - pr_err("Packet is erroneous!\n"); - cfpkt_destroy(pkt); - return -EPROTO; - } - info = cfpkt_info(pkt); - info->channel_id = service->layer.id; - info->hdr_len = 1; - info->dev_info = &service->dev_info; - cfpkt_set_prio(pkt, TC_PRIO_CONTROL); - return layr->dn->transmit(layr->dn, pkt); - } - default: - break; - } - return -EINVAL; -} - -static void cfsrvl_release(struct cflayer *layer) -{ - struct cfsrvl *service = container_of(layer, struct cfsrvl, layer); - kfree(service); -} - -void cfsrvl_init(struct cfsrvl *service, - u8 channel_id, - struct dev_info *dev_info, - bool supports_flowctrl) -{ - caif_assert(offsetof(struct cfsrvl, layer) == 0); - service->open = false; - service->modem_flow_on = true; - service->phy_flow_on = true; - service->layer.id = channel_id; - service->layer.ctrlcmd = cfservl_ctrlcmd; - service->layer.modemcmd = cfservl_modemcmd; - service->dev_info = *dev_info; - service->supports_flowctrl = supports_flowctrl; - service->release = cfsrvl_release; -} - -bool cfsrvl_ready(struct cfsrvl *service, int *err) -{ - if (!service->open) { - *err = -ENOTCONN; - return false; - } - return true; -} - -bool cfsrvl_phyid_match(struct cflayer *layer, int phyid) -{ - struct cfsrvl *servl = container_obj(layer); - return servl->dev_info.id == phyid; -} - -void caif_free_client(struct cflayer *adap_layer) -{ - struct cflayer *serv_layer; - struct cfsrvl *servl; - - if (!adap_layer) - return; - - serv_layer = adap_layer->dn; - if (!serv_layer) - return; - - layer_set_dn(adap_layer, NULL); - layer_set_up(serv_layer, NULL); - - servl = container_obj(serv_layer); - servl->release(&servl->layer); -} -EXPORT_SYMBOL(caif_free_client); - -void caif_client_register_refcnt(struct cflayer *adapt_layer, - void (*hold)(struct cflayer *lyr), - void (*put)(struct cflayer *lyr)) -{ - struct cfsrvl *service; - - if (WARN_ON(adapt_layer == NULL || adapt_layer->dn == NULL)) - return; - service = container_of(adapt_layer->dn, struct cfsrvl, layer); - service->hold = hold; - service->put = put; -} -EXPORT_SYMBOL(caif_client_register_refcnt); diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c deleted file mode 100644 index 5111090bb2c0..000000000000 --- a/net/caif/cfutill.c +++ /dev/null @@ -1,104 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ - -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/slab.h> -#include <linux/errno.h> -#include <net/caif/caif_layer.h> -#include <net/caif/cfsrvl.h> -#include <net/caif/cfpkt.h> - -#define container_obj(layr) ((struct cfsrvl *) layr) -#define UTIL_PAYLOAD 0x00 -#define UTIL_CMD_BIT 0x80 -#define UTIL_REMOTE_SHUTDOWN 0x82 -#define UTIL_FLOW_OFF 0x81 -#define UTIL_FLOW_ON 0x80 - -static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt); -static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt); - -struct cflayer *cfutill_create(u8 channel_id, struct dev_info *dev_info) -{ - struct cfsrvl *util = kzalloc_obj(struct cfsrvl, GFP_ATOMIC); - if (!util) - return NULL; - caif_assert(offsetof(struct cfsrvl, layer) == 0); - cfsrvl_init(util, channel_id, dev_info, true); - util->layer.receive = cfutill_receive; - util->layer.transmit = cfutill_transmit; - snprintf(util->layer.name, CAIF_LAYER_NAME_SZ, "util1"); - return &util->layer; -} - -static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt) -{ - u8 cmd = -1; - struct cfsrvl *service = container_obj(layr); - caif_assert(layr != NULL); - caif_assert(layr->up != NULL); - caif_assert(layr->up->receive != NULL); - caif_assert(layr->up->ctrlcmd != NULL); - if (cfpkt_extr_head(pkt, &cmd, 1) < 0) { - pr_err("Packet is erroneous!\n"); - cfpkt_destroy(pkt); - return -EPROTO; - } - - switch (cmd) { - case UTIL_PAYLOAD: - return layr->up->receive(layr->up, pkt); - case UTIL_FLOW_OFF: - layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_OFF_IND, 0); - cfpkt_destroy(pkt); - return 0; - case UTIL_FLOW_ON: - layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_ON_IND, 0); - cfpkt_destroy(pkt); - return 0; - case UTIL_REMOTE_SHUTDOWN: /* Remote Shutdown Request */ - pr_err("REMOTE SHUTDOWN REQUEST RECEIVED\n"); - layr->ctrlcmd(layr, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, 0); - service->open = false; - cfpkt_destroy(pkt); - return 0; - default: - cfpkt_destroy(pkt); - pr_warn("Unknown service control %d (0x%x)\n", cmd, cmd); - return -EPROTO; - } -} - -static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt) -{ - u8 zero = 0; - struct caif_payload_info *info; - int ret; - struct cfsrvl *service = container_obj(layr); - caif_assert(layr != NULL); - caif_assert(layr->dn != NULL); - caif_assert(layr->dn->transmit != NULL); - - if (!cfsrvl_ready(service, &ret)) { - cfpkt_destroy(pkt); - return ret; - } - - cfpkt_add_head(pkt, &zero, 1); - /* Add info for MUX-layer to route the packet out. */ - info = cfpkt_info(pkt); - info->channel_id = service->layer.id; - /* - * To optimize alignment, we add up the size of CAIF header before - * payload. - */ - info->hdr_len = 1; - info->dev_info = &service->dev_info; - return layr->dn->transmit(layr->dn, pkt); -} diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c deleted file mode 100644 index 53f844c49bbb..000000000000 --- a/net/caif/cfveil.c +++ /dev/null @@ -1,101 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ - -#include <linux/stddef.h> -#include <linux/slab.h> -#include <net/caif/caif_layer.h> -#include <net/caif/cfsrvl.h> -#include <net/caif/cfpkt.h> - -#define VEI_PAYLOAD 0x00 -#define VEI_CMD_BIT 0x80 -#define VEI_FLOW_OFF 0x81 -#define VEI_FLOW_ON 0x80 -#define VEI_SET_PIN 0x82 - -#define container_obj(layr) container_of(layr, struct cfsrvl, layer) - -static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt); -static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt); - -struct cflayer *cfvei_create(u8 channel_id, struct dev_info *dev_info) -{ - struct cfsrvl *vei = kzalloc_obj(struct cfsrvl, GFP_ATOMIC); - if (!vei) - return NULL; - caif_assert(offsetof(struct cfsrvl, layer) == 0); - cfsrvl_init(vei, channel_id, dev_info, true); - vei->layer.receive = cfvei_receive; - vei->layer.transmit = cfvei_transmit; - snprintf(vei->layer.name, CAIF_LAYER_NAME_SZ, "vei%d", channel_id); - return &vei->layer; -} - -static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt) -{ - u8 cmd; - int ret; - caif_assert(layr->up != NULL); - caif_assert(layr->receive != NULL); - caif_assert(layr->ctrlcmd != NULL); - - - if (cfpkt_extr_head(pkt, &cmd, 1) < 0) { - pr_err("Packet is erroneous!\n"); - cfpkt_destroy(pkt); - return -EPROTO; - } - switch (cmd) { - case VEI_PAYLOAD: - ret = layr->up->receive(layr->up, pkt); - return ret; - case VEI_FLOW_OFF: - layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_OFF_IND, 0); - cfpkt_destroy(pkt); - return 0; - case VEI_FLOW_ON: - layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_ON_IND, 0); - cfpkt_destroy(pkt); - return 0; - case VEI_SET_PIN: /* SET RS232 PIN */ - cfpkt_destroy(pkt); - return 0; - default: /* SET RS232 PIN */ - pr_warn("Unknown VEI control packet %d (0x%x)!\n", cmd, cmd); - cfpkt_destroy(pkt); - return -EPROTO; - } -} - -static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt) -{ - u8 tmp = 0; - struct caif_payload_info *info; - int ret; - struct cfsrvl *service = container_obj(layr); - if (!cfsrvl_ready(service, &ret)) - goto err; - caif_assert(layr->dn != NULL); - caif_assert(layr->dn->transmit != NULL); - - if (cfpkt_add_head(pkt, &tmp, 1) < 0) { - pr_err("Packet is erroneous!\n"); - ret = -EPROTO; - goto err; - } - - /* Add info-> for MUX-layer to route the packet out. */ - info = cfpkt_info(pkt); - info->channel_id = service->layer.id; - info->hdr_len = 1; - info->dev_info = &service->dev_info; - return layr->dn->transmit(layr->dn, pkt); -err: - cfpkt_destroy(pkt); - return ret; -} diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c deleted file mode 100644 index 39e075b0a259..000000000000 --- a/net/caif/cfvidl.c +++ /dev/null @@ -1,65 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ - -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/slab.h> -#include <linux/errno.h> -#include <net/caif/caif_layer.h> -#include <net/caif/cfsrvl.h> -#include <net/caif/cfpkt.h> - -#define container_obj(layr) ((struct cfsrvl *) layr) - -static int cfvidl_receive(struct cflayer *layr, struct cfpkt *pkt); -static int cfvidl_transmit(struct cflayer *layr, struct cfpkt *pkt); - -struct cflayer *cfvidl_create(u8 channel_id, struct dev_info *dev_info) -{ - struct cfsrvl *vid = kzalloc_obj(struct cfsrvl, GFP_ATOMIC); - if (!vid) - return NULL; - caif_assert(offsetof(struct cfsrvl, layer) == 0); - - cfsrvl_init(vid, channel_id, dev_info, false); - vid->layer.receive = cfvidl_receive; - vid->layer.transmit = cfvidl_transmit; - snprintf(vid->layer.name, CAIF_LAYER_NAME_SZ, "vid1"); - return &vid->layer; -} - -static int cfvidl_receive(struct cflayer *layr, struct cfpkt *pkt) -{ - u32 videoheader; - if (cfpkt_extr_head(pkt, &videoheader, 4) < 0) { - pr_err("Packet is erroneous!\n"); - cfpkt_destroy(pkt); - return -EPROTO; - } - return layr->up->receive(layr->up, pkt); -} - -static int cfvidl_transmit(struct cflayer *layr, struct cfpkt *pkt) -{ - struct cfsrvl *service = container_obj(layr); - struct caif_payload_info *info; - u32 videoheader = 0; - int ret; - - if (!cfsrvl_ready(service, &ret)) { - cfpkt_destroy(pkt); - return ret; - } - - cfpkt_add_head(pkt, &videoheader, 4); - /* Add info for MUX-layer to route the packet out */ - info = cfpkt_info(pkt); - info->channel_id = service->layer.id; - info->dev_info = &service->dev_info; - return layr->dn->transmit(layr->dn, pkt); -} diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c deleted file mode 100644 index fa6a3c2634a8..000000000000 --- a/net/caif/chnl_net.c +++ /dev/null @@ -1,531 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) ST-Ericsson AB 2010 - * Authors: Sjur Brendeland - * Daniel Martensson - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ - -#include <linux/fs.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/netdevice.h> -#include <linux/if_ether.h> -#include <linux/ip.h> -#include <linux/sched.h> -#include <linux/sockios.h> -#include <linux/caif/if_caif.h> -#include <net/rtnetlink.h> -#include <net/caif/caif_layer.h> -#include <net/caif/cfpkt.h> -#include <net/caif/caif_dev.h> - -/* GPRS PDP connection has MTU to 1500 */ -#define GPRS_PDP_MTU 1500 -/* 5 sec. connect timeout */ -#define CONNECT_TIMEOUT (5 * HZ) -#define CAIF_NET_DEFAULT_QUEUE_LEN 500 -#define UNDEF_CONNID 0xffffffff - -/*This list is protected by the rtnl lock. */ -static LIST_HEAD(chnl_net_list); - -MODULE_DESCRIPTION("ST-Ericsson CAIF modem protocol GPRS network device"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_RTNL_LINK("caif"); - -enum caif_states { - CAIF_CONNECTED = 1, - CAIF_CONNECTING, - CAIF_DISCONNECTED, - CAIF_SHUTDOWN -}; - -struct chnl_net { - struct cflayer chnl; - struct caif_connect_request conn_req; - struct list_head list_field; - struct net_device *netdev; - wait_queue_head_t netmgmt_wq; - /* Flow status to remember and control the transmission. */ - bool flowenabled; - enum caif_states state; -}; - -static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt) -{ - struct sk_buff *skb; - struct chnl_net *priv; - int pktlen; - const u8 *ip_version; - u8 buf; - - priv = container_of(layr, struct chnl_net, chnl); - - skb = (struct sk_buff *) cfpkt_tonative(pkt); - - /* Get length of CAIF packet. */ - pktlen = skb->len; - - /* Pass some minimum information and - * send the packet to the net stack. - */ - skb->dev = priv->netdev; - - /* check the version of IP */ - ip_version = skb_header_pointer(skb, 0, 1, &buf); - if (!ip_version) { - kfree_skb(skb); - return -EINVAL; - } - - switch (*ip_version >> 4) { - case 4: - skb->protocol = htons(ETH_P_IP); - break; - case 6: - skb->protocol = htons(ETH_P_IPV6); - break; - default: - kfree_skb(skb); - priv->netdev->stats.rx_errors++; - return -EINVAL; - } - - /* If we change the header in loop mode, the checksum is corrupted. */ - if (priv->conn_req.protocol == CAIFPROTO_DATAGRAM_LOOP) - skb->ip_summed = CHECKSUM_UNNECESSARY; - else - skb->ip_summed = CHECKSUM_NONE; - - netif_rx(skb); - - /* Update statistics. */ - priv->netdev->stats.rx_packets++; - priv->netdev->stats.rx_bytes += pktlen; - - return 0; -} - -static int delete_device(struct chnl_net *dev) -{ - ASSERT_RTNL(); - if (dev->netdev) - unregister_netdevice(dev->netdev); - return 0; -} - -static void close_work(struct work_struct *work) -{ - struct chnl_net *dev = NULL; - struct list_head *list_node; - struct list_head *_tmp; - - rtnl_lock(); - list_for_each_safe(list_node, _tmp, &chnl_net_list) { - dev = list_entry(list_node, struct chnl_net, list_field); - if (dev->state == CAIF_SHUTDOWN) - dev_close(dev->netdev); - } - rtnl_unlock(); -} -static DECLARE_WORK(close_worker, close_work); - -static void chnl_hold(struct cflayer *lyr) -{ - struct chnl_net *priv = container_of(lyr, struct chnl_net, chnl); - dev_hold(priv->netdev); -} - -static void chnl_put(struct cflayer *lyr) -{ - struct chnl_net *priv = container_of(lyr, struct chnl_net, chnl); - dev_put(priv->netdev); -} - -static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow, - int phyid) -{ - struct chnl_net *priv = container_of(layr, struct chnl_net, chnl); - pr_debug("NET flowctrl func called flow: %s\n", - flow == CAIF_CTRLCMD_FLOW_ON_IND ? "ON" : - flow == CAIF_CTRLCMD_INIT_RSP ? "INIT" : - flow == CAIF_CTRLCMD_FLOW_OFF_IND ? "OFF" : - flow == CAIF_CTRLCMD_DEINIT_RSP ? "CLOSE/DEINIT" : - flow == CAIF_CTRLCMD_INIT_FAIL_RSP ? "OPEN_FAIL" : - flow == CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND ? - "REMOTE_SHUTDOWN" : "UNKNOWN CTRL COMMAND"); - - - - switch (flow) { - case CAIF_CTRLCMD_FLOW_OFF_IND: - priv->flowenabled = false; - netif_stop_queue(priv->netdev); - break; - case CAIF_CTRLCMD_DEINIT_RSP: - priv->state = CAIF_DISCONNECTED; - break; - case CAIF_CTRLCMD_INIT_FAIL_RSP: - priv->state = CAIF_DISCONNECTED; - wake_up_interruptible(&priv->netmgmt_wq); - break; - case CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND: - priv->state = CAIF_SHUTDOWN; - netif_tx_disable(priv->netdev); - schedule_work(&close_worker); - break; - case CAIF_CTRLCMD_FLOW_ON_IND: - priv->flowenabled = true; - netif_wake_queue(priv->netdev); - break; - case CAIF_CTRLCMD_INIT_RSP: - caif_client_register_refcnt(&priv->chnl, chnl_hold, chnl_put); - priv->state = CAIF_CONNECTED; - priv->flowenabled = true; - netif_wake_queue(priv->netdev); - wake_up_interruptible(&priv->netmgmt_wq); - break; - default: - break; - } -} - -static netdev_tx_t chnl_net_start_xmit(struct sk_buff *skb, - struct net_device *dev) -{ - struct chnl_net *priv; - struct cfpkt *pkt = NULL; - int len; - int result = -1; - /* Get our private data. */ - priv = netdev_priv(dev); - - if (skb->len > priv->netdev->mtu) { - pr_warn("Size of skb exceeded MTU\n"); - kfree_skb(skb); - dev->stats.tx_errors++; - return NETDEV_TX_OK; - } - - if (!priv->flowenabled) { - pr_debug("dropping packets flow off\n"); - kfree_skb(skb); - dev->stats.tx_dropped++; - return NETDEV_TX_OK; - } - - if (priv->conn_req.protocol == CAIFPROTO_DATAGRAM_LOOP) - swap(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); - - /* Store original SKB length. */ - len = skb->len; - - pkt = cfpkt_fromnative(CAIF_DIR_OUT, (void *) skb); - - /* Send the packet down the stack. */ - result = priv->chnl.dn->transmit(priv->chnl.dn, pkt); - if (result) { - dev->stats.tx_dropped++; - return NETDEV_TX_OK; - } - - /* Update statistics. */ - dev->stats.tx_packets++; - dev->stats.tx_bytes += len; - - return NETDEV_TX_OK; -} - -static int chnl_net_open(struct net_device *dev) -{ - struct chnl_net *priv = NULL; - int result = -1; - int llifindex, headroom, tailroom, mtu; - struct net_device *lldev; - ASSERT_RTNL(); - priv = netdev_priv(dev); - if (!priv) { - pr_debug("chnl_net_open: no priv\n"); - return -ENODEV; - } - - if (priv->state != CAIF_CONNECTING) { - priv->state = CAIF_CONNECTING; - result = caif_connect_client(dev_net(dev), &priv->conn_req, - &priv->chnl, &llifindex, - &headroom, &tailroom); - if (result != 0) { - pr_debug("err: " - "Unable to register and open device," - " Err:%d\n", - result); - goto error; - } - - lldev = __dev_get_by_index(dev_net(dev), llifindex); - - if (lldev == NULL) { - pr_debug("no interface?\n"); - result = -ENODEV; - goto error; - } - - dev->needed_tailroom = tailroom + lldev->needed_tailroom; - dev->hard_header_len = headroom + lldev->hard_header_len + - lldev->needed_tailroom; - - /* - * MTU, head-room etc is not know before we have a - * CAIF link layer device available. MTU calculation may - * override initial RTNL configuration. - * MTU is minimum of current mtu, link layer mtu pluss - * CAIF head and tail, and PDP GPRS contexts max MTU. - */ - mtu = min_t(int, dev->mtu, lldev->mtu - (headroom + tailroom)); - mtu = min_t(int, GPRS_PDP_MTU, mtu); - dev_set_mtu(dev, mtu); - - if (mtu < 100) { - pr_warn("CAIF Interface MTU too small (%d)\n", mtu); - result = -ENODEV; - goto error; - } - } - - rtnl_unlock(); /* Release RTNL lock during connect wait */ - - result = wait_event_interruptible_timeout(priv->netmgmt_wq, - priv->state != CAIF_CONNECTING, - CONNECT_TIMEOUT); - - rtnl_lock(); - - if (result == -ERESTARTSYS) { - pr_debug("wait_event_interruptible woken by a signal\n"); - result = -ERESTARTSYS; - goto error; - } - - if (result == 0) { - pr_debug("connect timeout\n"); - result = -ETIMEDOUT; - goto error; - } - - if (priv->state != CAIF_CONNECTED) { - pr_debug("connect failed\n"); - result = -ECONNREFUSED; - goto error; - } - pr_debug("CAIF Netdevice connected\n"); - return 0; - -error: - caif_disconnect_client(dev_net(dev), &priv->chnl); - priv->state = CAIF_DISCONNECTED; - pr_debug("state disconnected\n"); - return result; - -} - -static int chnl_net_stop(struct net_device *dev) -{ - struct chnl_net *priv; - - ASSERT_RTNL(); - priv = netdev_priv(dev); - priv->state = CAIF_DISCONNECTED; - caif_disconnect_client(dev_net(dev), &priv->chnl); - return 0; -} - -static int chnl_net_init(struct net_device *dev) -{ - struct chnl_net *priv; - ASSERT_RTNL(); - priv = netdev_priv(dev); - INIT_LIST_HEAD(&priv->list_field); - return 0; -} - -static void chnl_net_uninit(struct net_device *dev) -{ - struct chnl_net *priv; - ASSERT_RTNL(); - priv = netdev_priv(dev); - list_del_init(&priv->list_field); -} - -static const struct net_device_ops netdev_ops = { - .ndo_open = chnl_net_open, - .ndo_stop = chnl_net_stop, - .ndo_init = chnl_net_init, - .ndo_uninit = chnl_net_uninit, - .ndo_start_xmit = chnl_net_start_xmit, -}; - -static void chnl_net_destructor(struct net_device *dev) -{ - struct chnl_net *priv = netdev_priv(dev); - caif_free_client(&priv->chnl); -} - -static void ipcaif_net_setup(struct net_device *dev) -{ - struct chnl_net *priv; - dev->netdev_ops = &netdev_ops; - dev->needs_free_netdev = true; - dev->priv_destructor = chnl_net_destructor; - dev->flags |= IFF_NOARP; - dev->flags |= IFF_POINTOPOINT; - dev->mtu = GPRS_PDP_MTU; - dev->tx_queue_len = CAIF_NET_DEFAULT_QUEUE_LEN; - - priv = netdev_priv(dev); - priv->chnl.receive = chnl_recv_cb; - priv->chnl.ctrlcmd = chnl_flowctrl_cb; - priv->netdev = dev; - priv->conn_req.protocol = CAIFPROTO_DATAGRAM; - priv->conn_req.link_selector = CAIF_LINK_HIGH_BANDW; - priv->conn_req.priority = CAIF_PRIO_LOW; - /* Insert illegal value */ - priv->conn_req.sockaddr.u.dgm.connection_id = UNDEF_CONNID; - priv->flowenabled = false; - - init_waitqueue_head(&priv->netmgmt_wq); -} - - -static int ipcaif_fill_info(struct sk_buff *skb, const struct net_device *dev) -{ - struct chnl_net *priv; - u8 loop; - priv = netdev_priv(dev); - if (nla_put_u32(skb, IFLA_CAIF_IPV4_CONNID, - priv->conn_req.sockaddr.u.dgm.connection_id) || - nla_put_u32(skb, IFLA_CAIF_IPV6_CONNID, - priv->conn_req.sockaddr.u.dgm.connection_id)) - goto nla_put_failure; - loop = priv->conn_req.protocol == CAIFPROTO_DATAGRAM_LOOP; - if (nla_put_u8(skb, IFLA_CAIF_LOOPBACK, loop)) - goto nla_put_failure; - return 0; -nla_put_failure: - return -EMSGSIZE; - -} - -static void caif_netlink_parms(struct nlattr *data[], - struct caif_connect_request *conn_req) -{ - if (!data) { - pr_warn("no params data found\n"); - return; - } - if (data[IFLA_CAIF_IPV4_CONNID]) - conn_req->sockaddr.u.dgm.connection_id = - nla_get_u32(data[IFLA_CAIF_IPV4_CONNID]); - if (data[IFLA_CAIF_IPV6_CONNID]) - conn_req->sockaddr.u.dgm.connection_id = - nla_get_u32(data[IFLA_CAIF_IPV6_CONNID]); - if (data[IFLA_CAIF_LOOPBACK]) { - if (nla_get_u8(data[IFLA_CAIF_LOOPBACK])) - conn_req->protocol = CAIFPROTO_DATAGRAM_LOOP; - else - conn_req->protocol = CAIFPROTO_DATAGRAM; - } -} - -static int ipcaif_newlink(struct net_device *dev, - struct rtnl_newlink_params *params, - struct netlink_ext_ack *extack) -{ - struct nlattr **data = params->data; - int ret; - struct chnl_net *caifdev; - ASSERT_RTNL(); - caifdev = netdev_priv(dev); - caif_netlink_parms(data, &caifdev->conn_req); - - ret = register_netdevice(dev); - if (ret) - pr_warn("device rtml registration failed\n"); - else - list_add(&caifdev->list_field, &chnl_net_list); - - /* Use ifindex as connection id, and use loopback channel default. */ - if (caifdev->conn_req.sockaddr.u.dgm.connection_id == UNDEF_CONNID) { - caifdev->conn_req.sockaddr.u.dgm.connection_id = dev->ifindex; - caifdev->conn_req.protocol = CAIFPROTO_DATAGRAM_LOOP; - } - return ret; -} - -static int ipcaif_changelink(struct net_device *dev, struct nlattr *tb[], - struct nlattr *data[], - struct netlink_ext_ack *extack) -{ - struct chnl_net *caifdev; - ASSERT_RTNL(); - caifdev = netdev_priv(dev); - caif_netlink_parms(data, &caifdev->conn_req); - netdev_state_change(dev); - return 0; -} - -static size_t ipcaif_get_size(const struct net_device *dev) -{ - return - /* IFLA_CAIF_IPV4_CONNID */ - nla_total_size(4) + - /* IFLA_CAIF_IPV6_CONNID */ - nla_total_size(4) + - /* IFLA_CAIF_LOOPBACK */ - nla_total_size(2) + - 0; -} - -static const struct nla_policy ipcaif_policy[IFLA_CAIF_MAX + 1] = { - [IFLA_CAIF_IPV4_CONNID] = { .type = NLA_U32 }, - [IFLA_CAIF_IPV6_CONNID] = { .type = NLA_U32 }, - [IFLA_CAIF_LOOPBACK] = { .type = NLA_U8 } -}; - - -static struct rtnl_link_ops ipcaif_link_ops __read_mostly = { - .kind = "caif", - .priv_size = sizeof(struct chnl_net), - .setup = ipcaif_net_setup, - .maxtype = IFLA_CAIF_MAX, - .policy = ipcaif_policy, - .newlink = ipcaif_newlink, - .changelink = ipcaif_changelink, - .get_size = ipcaif_get_size, - .fill_info = ipcaif_fill_info, - -}; - -static int __init chnl_init_module(void) -{ - return rtnl_link_register(&ipcaif_link_ops); -} - -static void __exit chnl_exit_module(void) -{ - struct chnl_net *dev = NULL; - struct list_head *list_node; - struct list_head *_tmp; - rtnl_link_unregister(&ipcaif_link_ops); - rtnl_lock(); - list_for_each_safe(list_node, _tmp, &chnl_net_list) { - dev = list_entry(list_node, struct chnl_net, list_field); - list_del_init(list_node); - delete_device(dev); - } - rtnl_unlock(); -} - -module_init(chnl_init_module); -module_exit(chnl_exit_module); diff --git a/net/ceph/auth.c b/net/ceph/auth.c index 901b93530b21..17660bde896b 100644 --- a/net/ceph/auth.c +++ b/net/ceph/auth.c @@ -245,7 +245,7 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac, ac->protocol = 0; ac->ops = NULL; } - if (ac->protocol != protocol) { + if (!ac->protocol) { ret = init_protocol(ac, protocol); if (ret) { pr_err("auth protocol '%s' init failed: %d\n", @@ -257,7 +257,7 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac, ac->negotiating = false; } - if (result) { + if (result < 0) { pr_err("auth protocol '%s' mauth authentication failed: %d\n", ceph_auth_proto_name(ac->protocol), result); ret = result; diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index 3a5bd1cd1e99..17b041779fb9 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c @@ -911,7 +911,7 @@ int crush_do_rule(const struct crush_map *map, int osize; const struct crush_rule *rule; __u32 step; - int i, j; + int i; int numrep; int out_size; /* @@ -1012,7 +1012,6 @@ int crush_do_rule(const struct crush_map *map, if (numrep <= 0) continue; } - j = 0; /* make sure bucket id is valid */ bno = -1 - w[i]; if (bno < 0 || bno >= map->max_buckets) { @@ -1036,7 +1035,7 @@ int crush_do_rule(const struct crush_map *map, weight, weight_max, x, numrep, curstep->arg2, - o+osize, j, + o+osize, 0, result_max-osize, choose_tries, recurse_tries, @@ -1058,7 +1057,7 @@ int crush_do_rule(const struct crush_map *map, weight, weight_max, x, out_size, numrep, curstep->arg2, - o+osize, j, + o+osize, 0, choose_tries, choose_leaf_tries ? choose_leaf_tries : 1, diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 108adb583744..34b3097b4c7b 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -368,8 +368,8 @@ static void ceph_sock_write_space(struct sock *sk) /* only queue to workqueue if there is data we want to write, * and there is sufficient space in the socket buffer to accept * more data. clear SOCK_NOSPACE so that ceph_sock_write_space() - * doesn't get called again until try_write() fills the socket - * buffer. See net/ipv4/tcp_input.c:tcp_check_space() + * doesn't get called again until ceph_con_v[12]_try_write() fills + * the socket buffer. See net/ipv4/tcp_input.c:tcp_check_space() * and net/core/stream.c:sk_stream_write_space(). */ if (ceph_con_flag_test(con, CEPH_CON_F_WRITE_PENDING)) { diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c index 50f65820f623..05f6eea299fc 100644 --- a/net/ceph/messenger_v2.c +++ b/net/ceph/messenger_v2.c @@ -8,7 +8,6 @@ #include <linux/ceph/ceph_debug.h> #include <crypto/aead.h> -#include <crypto/hash.h> #include <crypto/sha2.h> #include <crypto/utils.h> #include <linux/bvec.h> @@ -2352,16 +2351,14 @@ bad: } /* - * Align session_key and con_secret to avoid GFP_ATOMIC allocation - * inside crypto_shash_setkey() and crypto_aead_setkey() called from - * setup_crypto(). __aligned(16) isn't guaranteed to work for stack - * objects, so do it by hand. + * Align con_secret to avoid GFP_ATOMIC allocation inside + * crypto_aead_setkey() called from setup_crypto(). __aligned(16) + * isn't guaranteed to work for stack objects, so do it by hand. */ static int process_auth_done(struct ceph_connection *con, void *p, void *end) { - u8 session_key_buf[CEPH_MAX_KEY_LEN + 16]; + u8 session_key[CEPH_MAX_KEY_LEN]; u8 con_secret_buf[CEPH_MAX_CON_SECRET_LEN + 16]; - u8 *session_key = PTR_ALIGN(&session_key_buf[0], 16); u8 *con_secret = PTR_ALIGN(&con_secret_buf[0], 16); int session_key_len, con_secret_len; int payload_len; @@ -2415,7 +2412,7 @@ static int process_auth_done(struct ceph_connection *con, void *p, void *end) con->state = CEPH_CON_S_V2_AUTH_SIGNATURE; out: - memzero_explicit(session_key_buf, sizeof(session_key_buf)); + memzero_explicit(session_key, sizeof(session_key)); memzero_explicit(con_secret_buf, sizeof(con_secret_buf)); return ret; diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index d5080530ce0c..d2cdc8ee3155 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -174,6 +174,8 @@ int ceph_monmap_contains(struct ceph_monmap *m, struct ceph_entity_addr *addr) */ static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len) { + BUG_ON(len > monc->m_auth->front_alloc_len); + monc->pending_auth = 1; monc->m_auth->front.iov_len = len; monc->m_auth->hdr.front_len = cpu_to_le32(len); diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 14eb7812bda4..ecd659f79fd4 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -172,7 +172,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) struct bpf_map *map; smap = rcu_dereference(SDATA(selem)->smap); - if (!(smap->map.map_flags & BPF_F_CLONE)) + if (!smap || !(smap->map.map_flags & BPF_F_CLONE)) continue; /* Note that for lockless listeners adding new element @@ -531,10 +531,10 @@ err_free: } EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_alloc); -static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb) +static int diag_get(struct bpf_local_storage_map *smap, + struct bpf_local_storage_data *sdata, struct sk_buff *skb) { struct nlattr *nla_stg, *nla_value; - struct bpf_local_storage_map *smap; /* It cannot exceed max nlattr's payload */ BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < BPF_LOCAL_STORAGE_MAX_VALUE_SIZE); @@ -543,7 +543,6 @@ static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb) if (!nla_stg) return -EMSGSIZE; - smap = rcu_dereference(sdata->smap); if (nla_put_u32(skb, SK_DIAG_BPF_STORAGE_MAP_ID, smap->map.id)) goto errout; @@ -558,6 +557,7 @@ static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb) sdata->data, true); else copy_map_value(&smap->map, nla_data(nla_value), sdata->data); + check_and_init_map_value(&smap->map, nla_data(nla_value)); nla_nest_end(skb, nla_stg); return 0; @@ -596,9 +596,11 @@ static int bpf_sk_storage_diag_put_all(struct sock *sk, struct sk_buff *skb, saved_len = skb->len; hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) { smap = rcu_dereference(SDATA(selem)->smap); + if (!smap) + continue; diag_size += nla_value_size(smap->map.value_size); - if (nla_stgs && diag_get(SDATA(selem), skb)) + if (nla_stgs && diag_get(smap, SDATA(selem), skb)) /* Continue to learn diag_size */ err = -EMSGSIZE; } @@ -665,7 +667,7 @@ int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag, diag_size += nla_value_size(diag->maps[i]->value_size); - if (nla_stgs && diag_get(sdata, skb)) + if (nla_stgs && diag_get((struct bpf_local_storage_map *)diag->maps[i], sdata, skb)) /* Continue to learn diag_size */ err = -EMSGSIZE; } diff --git a/net/core/dev.c b/net/core/dev.c index e59f6025067c..0c6c270d9f7d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -371,7 +371,7 @@ static void netdev_name_node_alt_free(struct rcu_head *head) static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node) { netdev_name_node_del(name_node); - list_del(&name_node->list); + list_del_rcu(&name_node->list); call_rcu(&name_node->rcu, netdev_name_node_alt_free); } @@ -5862,13 +5862,6 @@ static __latent_entropy void net_tx_action(void) xfrm_dev_backlog(sd); } -#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_ATM_LANE) -/* This hook is defined here for ATM LANE */ -int (*br_fdb_test_addr_hook)(struct net_device *dev, - unsigned char *addr) __read_mostly; -EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); -#endif - /** * netdev_is_rx_handler_busy - check if receive handler is registered * @dev: device to check @@ -6869,9 +6862,9 @@ static void skb_defer_free_flush(void) #if defined(CONFIG_NET_RX_BUSY_POLL) -static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule) +static void __busy_poll_stop(struct napi_struct *napi, unsigned long timeout) { - if (!skip_schedule) { + if (!timeout) { gro_normal_list(&napi->gro); __napi_schedule(napi); return; @@ -6881,6 +6874,8 @@ static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule) gro_flush_normal(&napi->gro, HZ >= 1000); clear_bit(NAPI_STATE_SCHED, &napi->state); + hrtimer_start(&napi->timer, ns_to_ktime(timeout), + HRTIMER_MODE_REL_PINNED); } enum { @@ -6892,8 +6887,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, unsigned flags, u16 budget) { struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; - bool skip_schedule = false; - unsigned long timeout; + unsigned long timeout = 0; int rc; /* Busy polling means there is a high chance device driver hard irq @@ -6913,10 +6907,12 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, if (flags & NAPI_F_PREFER_BUSY_POLL) { napi->defer_hard_irqs_count = napi_get_defer_hard_irqs(napi); - timeout = napi_get_gro_flush_timeout(napi); - if (napi->defer_hard_irqs_count && timeout) { - hrtimer_start(&napi->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED); - skip_schedule = true; + if (napi->defer_hard_irqs_count) { + /* A short enough gro flush timeout and long enough + * poll can result in timer firing too early. + * Timer will be armed later if necessary. + */ + timeout = napi_get_gro_flush_timeout(napi); } } @@ -6931,7 +6927,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, trace_napi_poll(napi, rc, budget); netpoll_poll_unlock(have_poll_lock); if (rc == budget) - __busy_poll_stop(napi, skip_schedule); + __busy_poll_stop(napi, timeout); bpf_net_ctx_clear(bpf_net_ctx); local_bh_enable(); } @@ -9593,14 +9589,14 @@ static void dev_change_rx_flags(struct net_device *dev, int flags) ops->ndo_change_rx_flags(dev, flags); } -static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify) +int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify) { unsigned int old_flags = dev->flags; unsigned int promiscuity, flags; kuid_t uid; kgid_t gid; - ASSERT_RTNL(); + netdev_ops_assert_locked(dev); promiscuity = dev->promiscuity + inc; if (promiscuity == 0) { @@ -9636,16 +9632,8 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify) dev_change_rx_flags(dev, IFF_PROMISC); } - if (notify) { - /* The ops lock is only required to ensure consistent locking - * for `NETDEV_CHANGE` notifiers. This function is sometimes - * called without the lock, even for devices that are ops - * locked, such as in `dev_uc_sync_multiple` when using - * bonding or teaming. - */ - netdev_ops_assert_locked(dev); + if (notify) __dev_notify_flags(dev, old_flags, IFF_PROMISC, 0, NULL); - } return 0; } @@ -9667,7 +9655,7 @@ int netif_set_allmulti(struct net_device *dev, int inc, bool notify) unsigned int old_flags = dev->flags, old_gflags = dev->gflags; unsigned int allmulti, flags; - ASSERT_RTNL(); + netdev_ops_assert_locked(dev); allmulti = dev->allmulti + inc; if (allmulti == 0) { @@ -9697,46 +9685,6 @@ int netif_set_allmulti(struct net_device *dev, int inc, bool notify) return 0; } -/* - * Upload unicast and multicast address lists to device and - * configure RX filtering. When the device doesn't support unicast - * filtering it is put in promiscuous mode while unicast addresses - * are present. - */ -void __dev_set_rx_mode(struct net_device *dev) -{ - const struct net_device_ops *ops = dev->netdev_ops; - - /* dev_open will call this function so the list will stay sane. */ - if (!(dev->flags&IFF_UP)) - return; - - if (!netif_device_present(dev)) - return; - - if (!(dev->priv_flags & IFF_UNICAST_FLT)) { - /* Unicast addresses changes may only happen under the rtnl, - * therefore calling __dev_set_promiscuity here is safe. - */ - if (!netdev_uc_empty(dev) && !dev->uc_promisc) { - __dev_set_promiscuity(dev, 1, false); - dev->uc_promisc = true; - } else if (netdev_uc_empty(dev) && dev->uc_promisc) { - __dev_set_promiscuity(dev, -1, false); - dev->uc_promisc = false; - } - } - - if (ops->ndo_set_rx_mode) - ops->ndo_set_rx_mode(dev); -} - -void dev_set_rx_mode(struct net_device *dev) -{ - netif_addr_lock_bh(dev); - __dev_set_rx_mode(dev); - netif_addr_unlock_bh(dev); -} /** * netif_get_flags() - get flags reported to userspace @@ -9775,7 +9723,7 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags, unsigned int old_flags = dev->flags; int ret; - ASSERT_RTNL(); + netdev_ops_assert_locked(dev); /* * Set the flags on our device. @@ -11408,6 +11356,11 @@ int register_netdevice(struct net_device *dev) goto err_uninit; } + if (netdev_need_ops_lock(dev) && + dev->netdev_ops->ndo_set_rx_mode && + !dev->netdev_ops->ndo_set_rx_mode_async) + netdev_WARN(dev, "ops-locked drivers should use ndo_set_rx_mode_async\n"); + ret = netdev_do_alloc_pcpu_stats(dev); if (ret) goto err_uninit; @@ -12127,6 +12080,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, #endif mutex_init(&dev->lock); + INIT_LIST_HEAD(&dev->rx_mode_node); + __hw_addr_init(&dev->rx_mode_addr_cache); dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; setup(dev); @@ -12231,6 +12186,8 @@ void free_netdev(struct net_device *dev) kfree(rcu_dereference_protected(dev->ingress_queue, 1)); + __hw_addr_flush(&dev->rx_mode_addr_cache); + /* Flush device addresses */ dev_addr_flush(dev); diff --git a/net/core/dev.h b/net/core/dev.h index 628bdaebf0ca..0cf24b8f5008 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -78,6 +78,7 @@ void linkwatch_run_queue(void); void dev_addr_flush(struct net_device *dev); int dev_addr_init(struct net_device *dev); void dev_addr_check(struct net_device *dev); +void __hw_addr_flush(struct netdev_hw_addr_list *list); #if IS_ENABLED(CONFIG_NET_SHAPER) void net_shaper_flush_netdev(struct net_device *dev); @@ -164,6 +165,9 @@ int netif_change_carrier(struct net_device *dev, bool new_carrier); int dev_change_carrier(struct net_device *dev, bool new_carrier); void __dev_set_rx_mode(struct net_device *dev); +int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify); +bool netif_rx_mode_clean(struct net_device *dev); +void netif_rx_mode_sync(struct net_device *dev); void __dev_notify_flags(struct net_device *dev, unsigned int old_flags, unsigned int gchanges, u32 portid, diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 76c91f224886..d73fcb0c6785 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -11,9 +11,18 @@ #include <linux/rtnetlink.h> #include <linux/export.h> #include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/workqueue.h> +#include <kunit/visibility.h> #include "dev.h" +static void netdev_rx_mode_work(struct work_struct *work); + +static LIST_HEAD(rx_mode_list); +static DEFINE_SPINLOCK(rx_mode_lock); +static DECLARE_WORK(rx_mode_work, netdev_rx_mode_work); + /* * General list handling functions */ @@ -481,7 +490,7 @@ void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list, } EXPORT_SYMBOL(__hw_addr_unsync_dev); -static void __hw_addr_flush(struct netdev_hw_addr_list *list) +void __hw_addr_flush(struct netdev_hw_addr_list *list) { struct netdev_hw_addr *ha, *tmp; @@ -492,6 +501,7 @@ static void __hw_addr_flush(struct netdev_hw_addr_list *list) } list->count = 0; } +EXPORT_SYMBOL_IF_KUNIT(__hw_addr_flush); void __hw_addr_init(struct netdev_hw_addr_list *list) { @@ -501,6 +511,133 @@ void __hw_addr_init(struct netdev_hw_addr_list *list) } EXPORT_SYMBOL(__hw_addr_init); +static void __hw_addr_splice(struct netdev_hw_addr_list *dst, + struct netdev_hw_addr_list *src) +{ + src->tree = RB_ROOT; + list_splice_init(&src->list, &dst->list); + dst->count += src->count; + src->count = 0; +} + +/** + * __hw_addr_list_snapshot - create a snapshot copy of an address list + * @snap: destination snapshot list (needs to be __hw_addr_init-initialized) + * @list: source address list to snapshot + * @addr_len: length of addresses + * @cache: entry cache to reuse entries from; falls back to GFP_ATOMIC + * + * Creates a copy of @list reusing entries from @cache when available. + * Must be called under a spinlock. + * + * Return: 0 on success, -errno on failure. + */ +int __hw_addr_list_snapshot(struct netdev_hw_addr_list *snap, + const struct netdev_hw_addr_list *list, + int addr_len, struct netdev_hw_addr_list *cache) +{ + struct netdev_hw_addr *ha, *entry; + + list_for_each_entry(ha, &list->list, list) { + if (cache->count) { + entry = list_first_entry(&cache->list, + struct netdev_hw_addr, list); + list_del(&entry->list); + cache->count--; + memcpy(entry->addr, ha->addr, addr_len); + entry->type = ha->type; + entry->global_use = false; + entry->synced = 0; + } else { + entry = __hw_addr_create(ha->addr, addr_len, ha->type, + false, false); + if (!entry) { + __hw_addr_flush(snap); + return -ENOMEM; + } + } + entry->sync_cnt = ha->sync_cnt; + entry->refcount = ha->refcount; + + list_add_tail(&entry->list, &snap->list); + __hw_addr_insert(snap, entry, addr_len); + snap->count++; + } + + return 0; +} +EXPORT_SYMBOL_IF_KUNIT(__hw_addr_list_snapshot); + +/** + * __hw_addr_list_reconcile - sync snapshot changes back and free snapshots + * @real_list: the real address list to update + * @work: the working snapshot (modified by driver via __hw_addr_sync_dev) + * @ref: the reference snapshot (untouched copy of original state) + * @addr_len: length of addresses + * @cache: entry cache to return snapshot entries to for reuse + * + * Walks the reference snapshot and compares each entry against the work + * snapshot to compute sync_cnt deltas. Applies those deltas to @real_list. + * Returns snapshot entries to @cache for reuse; frees both snapshots. + * Caller must hold netif_addr_lock_bh. + */ +void __hw_addr_list_reconcile(struct netdev_hw_addr_list *real_list, + struct netdev_hw_addr_list *work, + struct netdev_hw_addr_list *ref, int addr_len, + struct netdev_hw_addr_list *cache) +{ + struct netdev_hw_addr *ref_ha, *tmp, *work_ha, *real_ha; + int delta; + + list_for_each_entry_safe(ref_ha, tmp, &ref->list, list) { + work_ha = __hw_addr_lookup(work, ref_ha->addr, addr_len, + ref_ha->type); + if (work_ha) + delta = work_ha->sync_cnt - ref_ha->sync_cnt; + else + delta = -1; + + if (delta == 0) + continue; + + real_ha = __hw_addr_lookup(real_list, ref_ha->addr, addr_len, + ref_ha->type); + if (!real_ha) { + /* The real entry was concurrently removed. If the + * driver synced this addr to hardware (delta > 0), + * re-insert it as a stale entry so the next work + * run unsyncs it from hardware. + */ + if (delta > 0) { + rb_erase(&ref_ha->node, &ref->tree); + list_del(&ref_ha->list); + ref->count--; + ref_ha->sync_cnt = delta; + ref_ha->refcount = delta; + list_add_tail_rcu(&ref_ha->list, + &real_list->list); + __hw_addr_insert(real_list, ref_ha, + addr_len); + real_list->count++; + } + continue; + } + + real_ha->sync_cnt += delta; + real_ha->refcount += delta; + if (!real_ha->refcount) { + rb_erase(&real_ha->node, &real_list->tree); + list_del_rcu(&real_ha->list); + kfree_rcu(real_ha, rcu_head); + real_list->count--; + } + } + + __hw_addr_splice(cache, work); + __hw_addr_splice(cache, ref); +} +EXPORT_SYMBOL_IF_KUNIT(__hw_addr_list_reconcile); + /* * Device addresses handling functions */ @@ -1049,3 +1186,249 @@ void dev_mc_init(struct net_device *dev) __hw_addr_init(&dev->mc); } EXPORT_SYMBOL(dev_mc_init); + +static int netif_addr_lists_snapshot(struct net_device *dev, + struct netdev_hw_addr_list *uc_snap, + struct netdev_hw_addr_list *mc_snap, + struct netdev_hw_addr_list *uc_ref, + struct netdev_hw_addr_list *mc_ref) +{ + int err; + + err = __hw_addr_list_snapshot(uc_snap, &dev->uc, dev->addr_len, + &dev->rx_mode_addr_cache); + if (!err) + err = __hw_addr_list_snapshot(uc_ref, &dev->uc, dev->addr_len, + &dev->rx_mode_addr_cache); + if (!err) + err = __hw_addr_list_snapshot(mc_snap, &dev->mc, + dev->addr_len, + &dev->rx_mode_addr_cache); + if (!err) + err = __hw_addr_list_snapshot(mc_ref, &dev->mc, dev->addr_len, + &dev->rx_mode_addr_cache); + + if (err) { + __hw_addr_flush(uc_snap); + __hw_addr_flush(uc_ref); + __hw_addr_flush(mc_snap); + } + + return err; +} + +static void netif_addr_lists_reconcile(struct net_device *dev, + struct netdev_hw_addr_list *uc_snap, + struct netdev_hw_addr_list *mc_snap, + struct netdev_hw_addr_list *uc_ref, + struct netdev_hw_addr_list *mc_ref) +{ + __hw_addr_list_reconcile(&dev->uc, uc_snap, uc_ref, dev->addr_len, + &dev->rx_mode_addr_cache); + __hw_addr_list_reconcile(&dev->mc, mc_snap, mc_ref, dev->addr_len, + &dev->rx_mode_addr_cache); +} + +/** + * netif_uc_promisc_update() - evaluate whether uc_promisc should be toggled. + * @dev: device + * + * Must be called under netif_addr_lock_bh. + * Return: +1 to enter promisc, -1 to leave, 0 for no change. + */ +static int netif_uc_promisc_update(struct net_device *dev) +{ + if (dev->priv_flags & IFF_UNICAST_FLT) + return 0; + + if (!netdev_uc_empty(dev) && !dev->uc_promisc) { + dev->uc_promisc = true; + return 1; + } + if (netdev_uc_empty(dev) && dev->uc_promisc) { + dev->uc_promisc = false; + return -1; + } + return 0; +} + +static void netif_rx_mode_run(struct net_device *dev) +{ + struct netdev_hw_addr_list uc_snap, mc_snap, uc_ref, mc_ref; + const struct net_device_ops *ops = dev->netdev_ops; + int promisc_inc; + int err; + + might_sleep(); + netdev_ops_assert_locked(dev); + + __hw_addr_init(&uc_snap); + __hw_addr_init(&mc_snap); + __hw_addr_init(&uc_ref); + __hw_addr_init(&mc_ref); + + if (!(dev->flags & IFF_UP) || !netif_device_present(dev)) + return; + + if (ops->ndo_set_rx_mode_async) { + netif_addr_lock_bh(dev); + err = netif_addr_lists_snapshot(dev, &uc_snap, &mc_snap, + &uc_ref, &mc_ref); + if (err) { + netdev_WARN(dev, "failed to sync uc/mc addresses\n"); + netif_addr_unlock_bh(dev); + return; + } + + promisc_inc = netif_uc_promisc_update(dev); + netif_addr_unlock_bh(dev); + } else { + netif_addr_lock_bh(dev); + promisc_inc = netif_uc_promisc_update(dev); + netif_addr_unlock_bh(dev); + } + + if (promisc_inc) + __dev_set_promiscuity(dev, promisc_inc, false); + + if (ops->ndo_set_rx_mode_async) { + ops->ndo_set_rx_mode_async(dev, &uc_snap, &mc_snap); + + netif_addr_lock_bh(dev); + netif_addr_lists_reconcile(dev, &uc_snap, &mc_snap, + &uc_ref, &mc_ref); + netif_addr_unlock_bh(dev); + } else if (ops->ndo_set_rx_mode) { + netif_addr_lock_bh(dev); + ops->ndo_set_rx_mode(dev); + netif_addr_unlock_bh(dev); + } +} + +static void netdev_rx_mode_work(struct work_struct *work) +{ + struct net_device *dev; + + rtnl_lock(); + + while (true) { + spin_lock_bh(&rx_mode_lock); + if (list_empty(&rx_mode_list)) { + spin_unlock_bh(&rx_mode_lock); + break; + } + dev = list_first_entry(&rx_mode_list, struct net_device, + rx_mode_node); + list_del_init(&dev->rx_mode_node); + /* We must free netdev tracker under + * the spinlock protection. + */ + netdev_tracker_free(dev, &dev->rx_mode_tracker); + spin_unlock_bh(&rx_mode_lock); + + netdev_lock_ops(dev); + netif_rx_mode_run(dev); + netdev_unlock_ops(dev); + /* Use __dev_put() because netdev_tracker_free() was already + * called above. Must be after netdev_unlock_ops() to prevent + * netdev_run_todo() from freeing the device while still in use. + */ + __dev_put(dev); + } + + rtnl_unlock(); +} + +static void netif_rx_mode_queue(struct net_device *dev) +{ + spin_lock_bh(&rx_mode_lock); + if (list_empty(&dev->rx_mode_node)) { + list_add_tail(&dev->rx_mode_node, &rx_mode_list); + netdev_hold(dev, &dev->rx_mode_tracker, GFP_ATOMIC); + } + spin_unlock_bh(&rx_mode_lock); + schedule_work(&rx_mode_work); +} + +/** + * __dev_set_rx_mode() - upload unicast and multicast address lists to device + * and configure RX filtering. + * @dev: device + * + * When the device doesn't support unicast filtering it is put in promiscuous + * mode while unicast addresses are present. + */ +void __dev_set_rx_mode(struct net_device *dev) +{ + const struct net_device_ops *ops = dev->netdev_ops; + int promisc_inc; + + /* dev_open will call this function so the list will stay sane. */ + if (!(dev->flags & IFF_UP)) + return; + + if (!netif_device_present(dev)) + return; + + if (ops->ndo_set_rx_mode_async || ops->ndo_change_rx_flags || + netdev_need_ops_lock(dev)) { + netif_rx_mode_queue(dev); + return; + } + + /* Legacy path for non-ops-locked HW devices. */ + + promisc_inc = netif_uc_promisc_update(dev); + if (promisc_inc) + __dev_set_promiscuity(dev, promisc_inc, false); + + if (ops->ndo_set_rx_mode) + ops->ndo_set_rx_mode(dev); +} + +void dev_set_rx_mode(struct net_device *dev) +{ + netif_addr_lock_bh(dev); + __dev_set_rx_mode(dev); + netif_addr_unlock_bh(dev); +} + +bool netif_rx_mode_clean(struct net_device *dev) +{ + bool clean = false; + + spin_lock_bh(&rx_mode_lock); + if (!list_empty(&dev->rx_mode_node)) { + list_del_init(&dev->rx_mode_node); + clean = true; + /* We must release netdev tracker under + * the spinlock protection. + */ + netdev_tracker_free(dev, &dev->rx_mode_tracker); + } + spin_unlock_bh(&rx_mode_lock); + + return clean; +} + +/** + * netif_rx_mode_sync() - sync rx mode inline + * @dev: network device + * + * Drivers implementing ndo_set_rx_mode_async() have their rx mode callback + * executed from a workqueue. This allows the callback to sleep, but means + * the hardware update is deferred and may not be visible to userspace + * by the time the initiating syscall returns. netif_rx_mode_sync() steals + * workqueue update and executes it inline. This preserves the atomicity of + * operations to the userspace. + */ +void netif_rx_mode_sync(struct net_device *dev) +{ + if (netif_rx_mode_clean(dev)) { + netif_rx_mode_run(dev); + /* Use __dev_put() because netdev_tracker_free() was already + * called inside netif_rx_mode_clean(). + */ + __dev_put(dev); + } +} diff --git a/net/core/dev_addr_lists_test.c b/net/core/dev_addr_lists_test.c index 8e1dba825e94..260e71a2399f 100644 --- a/net/core/dev_addr_lists_test.c +++ b/net/core/dev_addr_lists_test.c @@ -2,22 +2,31 @@ #include <kunit/test.h> #include <linux/etherdevice.h> +#include <linux/math64.h> #include <linux/netdevice.h> #include <linux/rtnetlink.h> static const struct net_device_ops dummy_netdev_ops = { }; +#define ADDR_A 1 +#define ADDR_B 2 +#define ADDR_C 3 + struct dev_addr_test_priv { u32 addr_seen; + u32 addr_synced; + u32 addr_unsynced; }; static int dev_addr_test_sync(struct net_device *netdev, const unsigned char *a) { struct dev_addr_test_priv *datp = netdev_priv(netdev); - if (a[0] < 31 && !memchr_inv(a, a[0], ETH_ALEN)) + if (a[0] < 31 && !memchr_inv(a, a[0], ETH_ALEN)) { datp->addr_seen |= 1 << a[0]; + datp->addr_synced |= 1 << a[0]; + } return 0; } @@ -26,11 +35,22 @@ static int dev_addr_test_unsync(struct net_device *netdev, { struct dev_addr_test_priv *datp = netdev_priv(netdev); - if (a[0] < 31 && !memchr_inv(a, a[0], ETH_ALEN)) + if (a[0] < 31 && !memchr_inv(a, a[0], ETH_ALEN)) { datp->addr_seen &= ~(1 << a[0]); + datp->addr_unsynced |= 1 << a[0]; + } return 0; } +static void dev_addr_test_reset(struct net_device *netdev) +{ + struct dev_addr_test_priv *datp = netdev_priv(netdev); + + datp->addr_seen = 0; + datp->addr_synced = 0; + datp->addr_unsynced = 0; +} + static int dev_addr_test_init(struct kunit *test) { struct dev_addr_test_priv *datp; @@ -225,6 +245,363 @@ static void dev_addr_test_add_excl(struct kunit *test) rtnl_unlock(); } +/* Snapshot test: basic sync with no concurrent modifications. + * Add one address, snapshot, driver syncs it, reconcile propagates + * sync_cnt delta back to real list. + */ +static void dev_addr_test_snapshot_sync(struct kunit *test) +{ + struct netdev_hw_addr_list snap, ref, cache; + struct net_device *netdev = test->priv; + struct dev_addr_test_priv *datp; + struct netdev_hw_addr *ha; + u8 addr[ETH_ALEN]; + + datp = netdev_priv(netdev); + + rtnl_lock(); + + memset(addr, ADDR_A, sizeof(addr)); + KUNIT_EXPECT_EQ(test, 0, dev_uc_add(netdev, addr)); + + /* Snapshot: ADDR_A has sync_cnt=0, refcount=1 (new) */ + netif_addr_lock_bh(netdev); + __hw_addr_init(&snap); + __hw_addr_init(&ref); + __hw_addr_init(&cache); + KUNIT_EXPECT_EQ(test, 0, + __hw_addr_list_snapshot(&snap, &netdev->uc, ETH_ALEN, + &cache)); + KUNIT_EXPECT_EQ(test, 0, + __hw_addr_list_snapshot(&ref, &netdev->uc, ETH_ALEN, + &cache)); + netif_addr_unlock_bh(netdev); + + /* Driver syncs ADDR_A to hardware */ + dev_addr_test_reset(netdev); + __hw_addr_sync_dev(&snap, netdev, dev_addr_test_sync, + dev_addr_test_unsync); + KUNIT_EXPECT_EQ(test, 1 << ADDR_A, datp->addr_synced); + KUNIT_EXPECT_EQ(test, 0, datp->addr_unsynced); + + /* Reconcile: delta=+1 applied to real entry */ + netif_addr_lock_bh(netdev); + __hw_addr_list_reconcile(&netdev->uc, &snap, &ref, ETH_ALEN, + &cache); + netif_addr_unlock_bh(netdev); + + /* Real entry should now reflect the sync: sync_cnt=1, refcount=2 */ + KUNIT_EXPECT_EQ(test, 1, netdev->uc.count); + ha = list_first_entry(&netdev->uc.list, struct netdev_hw_addr, list); + KUNIT_EXPECT_MEMEQ(test, ha->addr, addr, ETH_ALEN); + KUNIT_EXPECT_EQ(test, 1, ha->sync_cnt); + KUNIT_EXPECT_EQ(test, 2, ha->refcount); + + /* Second work run: already synced, nothing to do */ + dev_addr_test_reset(netdev); + __hw_addr_sync_dev(&netdev->uc, netdev, dev_addr_test_sync, + dev_addr_test_unsync); + KUNIT_EXPECT_EQ(test, 0, datp->addr_synced); + KUNIT_EXPECT_EQ(test, 0, datp->addr_unsynced); + KUNIT_EXPECT_EQ(test, 1, netdev->uc.count); + + __hw_addr_flush(&cache); + rtnl_unlock(); +} + +/* Snapshot test: ADDR_A synced to hardware, then concurrently removed + * from the real list before reconcile runs. Reconcile re-inserts ADDR_A as + * a stale entry so the next work run unsyncs it from hardware. + */ +static void dev_addr_test_snapshot_remove_during_sync(struct kunit *test) +{ + struct netdev_hw_addr_list snap, ref, cache; + struct net_device *netdev = test->priv; + struct dev_addr_test_priv *datp; + struct netdev_hw_addr *ha; + u8 addr[ETH_ALEN]; + + datp = netdev_priv(netdev); + + rtnl_lock(); + + memset(addr, ADDR_A, sizeof(addr)); + KUNIT_EXPECT_EQ(test, 0, dev_uc_add(netdev, addr)); + + /* Snapshot: ADDR_A is new (sync_cnt=0, refcount=1) */ + netif_addr_lock_bh(netdev); + __hw_addr_init(&snap); + __hw_addr_init(&ref); + __hw_addr_init(&cache); + KUNIT_EXPECT_EQ(test, 0, + __hw_addr_list_snapshot(&snap, &netdev->uc, ETH_ALEN, + &cache)); + KUNIT_EXPECT_EQ(test, 0, + __hw_addr_list_snapshot(&ref, &netdev->uc, ETH_ALEN, + &cache)); + netif_addr_unlock_bh(netdev); + + /* Driver syncs ADDR_A to hardware */ + dev_addr_test_reset(netdev); + __hw_addr_sync_dev(&snap, netdev, dev_addr_test_sync, + dev_addr_test_unsync); + KUNIT_EXPECT_EQ(test, 1 << ADDR_A, datp->addr_synced); + KUNIT_EXPECT_EQ(test, 0, datp->addr_unsynced); + + /* Concurrent removal: user deletes ADDR_A while driver was working */ + memset(addr, ADDR_A, sizeof(addr)); + KUNIT_EXPECT_EQ(test, 0, dev_uc_del(netdev, addr)); + KUNIT_EXPECT_EQ(test, 0, netdev->uc.count); + + /* Reconcile: ADDR_A gone from real list but driver synced it, + * so it gets re-inserted as stale (sync_cnt=1, refcount=1). + */ + netif_addr_lock_bh(netdev); + __hw_addr_list_reconcile(&netdev->uc, &snap, &ref, ETH_ALEN, + &cache); + netif_addr_unlock_bh(netdev); + + KUNIT_EXPECT_EQ(test, 1, netdev->uc.count); + ha = list_first_entry(&netdev->uc.list, struct netdev_hw_addr, list); + KUNIT_EXPECT_MEMEQ(test, ha->addr, addr, ETH_ALEN); + KUNIT_EXPECT_EQ(test, 1, ha->sync_cnt); + KUNIT_EXPECT_EQ(test, 1, ha->refcount); + + /* Second work run: stale entry gets unsynced from HW and removed */ + dev_addr_test_reset(netdev); + __hw_addr_sync_dev(&netdev->uc, netdev, dev_addr_test_sync, + dev_addr_test_unsync); + KUNIT_EXPECT_EQ(test, 0, datp->addr_synced); + KUNIT_EXPECT_EQ(test, 1 << ADDR_A, datp->addr_unsynced); + KUNIT_EXPECT_EQ(test, 0, netdev->uc.count); + + __hw_addr_flush(&cache); + rtnl_unlock(); +} + +/* Snapshot test: ADDR_A was stale (unsynced from hardware by driver), + * but concurrently re-added by the user. The re-add bumps refcount of + * the existing stale entry. Reconcile applies delta=-1, leaving ADDR_A + * as a fresh entry (sync_cnt=0, refcount=1) for the next work run. + */ +static void dev_addr_test_snapshot_readd_during_unsync(struct kunit *test) +{ + struct netdev_hw_addr_list snap, ref, cache; + struct net_device *netdev = test->priv; + struct dev_addr_test_priv *datp; + struct netdev_hw_addr *ha; + u8 addr[ETH_ALEN]; + + datp = netdev_priv(netdev); + + rtnl_lock(); + + memset(addr, ADDR_A, sizeof(addr)); + KUNIT_EXPECT_EQ(test, 0, dev_uc_add(netdev, addr)); + + /* Sync ADDR_A to hardware: sync_cnt=1, refcount=2 */ + dev_addr_test_reset(netdev); + __hw_addr_sync_dev(&netdev->uc, netdev, dev_addr_test_sync, + dev_addr_test_unsync); + KUNIT_EXPECT_EQ(test, 1 << ADDR_A, datp->addr_synced); + KUNIT_EXPECT_EQ(test, 0, datp->addr_unsynced); + + /* User removes ADDR_A: refcount=1, sync_cnt=1 -> stale */ + KUNIT_EXPECT_EQ(test, 0, dev_uc_del(netdev, addr)); + + /* Snapshot: ADDR_A is stale (sync_cnt=1, refcount=1) */ + netif_addr_lock_bh(netdev); + __hw_addr_init(&snap); + __hw_addr_init(&ref); + __hw_addr_init(&cache); + KUNIT_EXPECT_EQ(test, 0, + __hw_addr_list_snapshot(&snap, &netdev->uc, ETH_ALEN, + &cache)); + KUNIT_EXPECT_EQ(test, 0, + __hw_addr_list_snapshot(&ref, &netdev->uc, ETH_ALEN, + &cache)); + netif_addr_unlock_bh(netdev); + + /* Driver unsyncs stale ADDR_A from hardware */ + dev_addr_test_reset(netdev); + __hw_addr_sync_dev(&snap, netdev, dev_addr_test_sync, + dev_addr_test_unsync); + KUNIT_EXPECT_EQ(test, 0, datp->addr_synced); + KUNIT_EXPECT_EQ(test, 1 << ADDR_A, datp->addr_unsynced); + + /* Concurrent: user re-adds ADDR_A. dev_uc_add finds the existing + * stale entry and bumps refcount from 1 -> 2. sync_cnt stays 1. + */ + KUNIT_EXPECT_EQ(test, 0, dev_uc_add(netdev, addr)); + KUNIT_EXPECT_EQ(test, 1, netdev->uc.count); + + /* Reconcile: ref sync_cnt=1 matches real sync_cnt=1, delta=-1 + * applied. Result: sync_cnt=0, refcount=1 (fresh). + */ + netif_addr_lock_bh(netdev); + __hw_addr_list_reconcile(&netdev->uc, &snap, &ref, ETH_ALEN, + &cache); + netif_addr_unlock_bh(netdev); + + /* Entry survives as fresh: needs re-sync to HW */ + KUNIT_EXPECT_EQ(test, 1, netdev->uc.count); + ha = list_first_entry(&netdev->uc.list, struct netdev_hw_addr, list); + KUNIT_EXPECT_MEMEQ(test, ha->addr, addr, ETH_ALEN); + KUNIT_EXPECT_EQ(test, 0, ha->sync_cnt); + KUNIT_EXPECT_EQ(test, 1, ha->refcount); + + /* Second work run: fresh entry gets synced to HW */ + dev_addr_test_reset(netdev); + __hw_addr_sync_dev(&netdev->uc, netdev, dev_addr_test_sync, + dev_addr_test_unsync); + KUNIT_EXPECT_EQ(test, 1 << ADDR_A, datp->addr_synced); + KUNIT_EXPECT_EQ(test, 0, datp->addr_unsynced); + + __hw_addr_flush(&cache); + rtnl_unlock(); +} + +/* Snapshot test: ADDR_A is new (synced by driver), and independent ADDR_B + * is concurrently removed from the real list. A's sync delta propagates + * normally; B's absence doesn't interfere. + */ +static void dev_addr_test_snapshot_add_and_remove(struct kunit *test) +{ + struct netdev_hw_addr_list snap, ref, cache; + struct net_device *netdev = test->priv; + struct dev_addr_test_priv *datp; + struct netdev_hw_addr *ha; + u8 addr[ETH_ALEN]; + + datp = netdev_priv(netdev); + + rtnl_lock(); + + /* Add ADDR_A and ADDR_B (will be synced then removed) */ + memset(addr, ADDR_A, sizeof(addr)); + KUNIT_EXPECT_EQ(test, 0, dev_uc_add(netdev, addr)); + memset(addr, ADDR_B, sizeof(addr)); + KUNIT_EXPECT_EQ(test, 0, dev_uc_add(netdev, addr)); + + /* Sync both to hardware: sync_cnt=1, refcount=2 */ + __hw_addr_sync_dev(&netdev->uc, netdev, dev_addr_test_sync, + dev_addr_test_unsync); + + /* Add ADDR_C (new, will be synced by snapshot) */ + memset(addr, ADDR_C, sizeof(addr)); + KUNIT_EXPECT_EQ(test, 0, dev_uc_add(netdev, addr)); + + /* Snapshot: A,B synced (sync_cnt=1,refcount=2); C new (0,1) */ + netif_addr_lock_bh(netdev); + __hw_addr_init(&snap); + __hw_addr_init(&ref); + __hw_addr_init(&cache); + KUNIT_EXPECT_EQ(test, 0, + __hw_addr_list_snapshot(&snap, &netdev->uc, ETH_ALEN, + &cache)); + KUNIT_EXPECT_EQ(test, 0, + __hw_addr_list_snapshot(&ref, &netdev->uc, ETH_ALEN, + &cache)); + netif_addr_unlock_bh(netdev); + + /* Driver syncs snapshot: ADDR_C is new -> synced; A,B already synced */ + dev_addr_test_reset(netdev); + __hw_addr_sync_dev(&snap, netdev, dev_addr_test_sync, + dev_addr_test_unsync); + KUNIT_EXPECT_EQ(test, 1 << ADDR_C, datp->addr_synced); + KUNIT_EXPECT_EQ(test, 0, datp->addr_unsynced); + + /* Concurrent: user removes addr B while driver was working */ + memset(addr, ADDR_B, sizeof(addr)); + KUNIT_EXPECT_EQ(test, 0, dev_uc_del(netdev, addr)); + + /* Reconcile: ADDR_C's delta=+1 applied to real list. + * ADDR_B's delta=0 (unchanged in snapshot), + * so nothing to apply to ADDR_B. + */ + netif_addr_lock_bh(netdev); + __hw_addr_list_reconcile(&netdev->uc, &snap, &ref, ETH_ALEN, + &cache); + netif_addr_unlock_bh(netdev); + + /* ADDR_A: unchanged (sync_cnt=1, refcount=2) + * ADDR_B: refcount went from 2->1 via dev_uc_del (still present, stale) + * ADDR_C: sync propagated (sync_cnt=1, refcount=2) + */ + KUNIT_EXPECT_EQ(test, 3, netdev->uc.count); + netdev_hw_addr_list_for_each(ha, &netdev->uc) { + u8 id = ha->addr[0]; + + if (!memchr_inv(ha->addr, id, ETH_ALEN)) { + if (id == ADDR_A) { + KUNIT_EXPECT_EQ(test, 1, ha->sync_cnt); + KUNIT_EXPECT_EQ(test, 2, ha->refcount); + } else if (id == ADDR_B) { + /* B: still present but now stale */ + KUNIT_EXPECT_EQ(test, 1, ha->sync_cnt); + KUNIT_EXPECT_EQ(test, 1, ha->refcount); + } else if (id == ADDR_C) { + KUNIT_EXPECT_EQ(test, 1, ha->sync_cnt); + KUNIT_EXPECT_EQ(test, 2, ha->refcount); + } + } + } + + /* Second work run: ADDR_B is stale, gets unsynced and removed */ + dev_addr_test_reset(netdev); + __hw_addr_sync_dev(&netdev->uc, netdev, dev_addr_test_sync, + dev_addr_test_unsync); + KUNIT_EXPECT_EQ(test, 0, datp->addr_synced); + KUNIT_EXPECT_EQ(test, 1 << ADDR_B, datp->addr_unsynced); + KUNIT_EXPECT_EQ(test, 2, netdev->uc.count); + + __hw_addr_flush(&cache); + rtnl_unlock(); +} + +static void dev_addr_test_snapshot_benchmark(struct kunit *test) +{ + struct net_device *netdev = test->priv; + struct netdev_hw_addr_list snap, cache; + u8 addr[ETH_ALEN]; + s64 duration = 0; + ktime_t start; + int i, iter; + + rtnl_lock(); + + for (i = 0; i < 1024; i++) { + memset(addr, 0, sizeof(addr)); + addr[0] = (i >> 8) & 0xff; + addr[1] = i & 0xff; + KUNIT_EXPECT_EQ(test, 0, dev_uc_add(netdev, addr)); + } + + __hw_addr_init(&cache); + + for (iter = 0; iter < 1000; iter++) { + netif_addr_lock_bh(netdev); + __hw_addr_init(&snap); + + start = ktime_get(); + KUNIT_EXPECT_EQ(test, 0, + __hw_addr_list_snapshot(&snap, &netdev->uc, + ETH_ALEN, &cache)); + duration += ktime_to_ns(ktime_sub(ktime_get(), start)); + + netif_addr_unlock_bh(netdev); + __hw_addr_flush(&snap); + } + + __hw_addr_flush(&cache); + + kunit_info(test, + "1024 addrs x 1000 snapshots: %lld ns total, %lld ns/iter", + duration, div_s64(duration, 1000)); + + rtnl_unlock(); +} + static struct kunit_case dev_addr_test_cases[] = { KUNIT_CASE(dev_addr_test_basic), KUNIT_CASE(dev_addr_test_sync_one), @@ -232,6 +609,11 @@ static struct kunit_case dev_addr_test_cases[] = { KUNIT_CASE(dev_addr_test_del_main), KUNIT_CASE(dev_addr_test_add_set), KUNIT_CASE(dev_addr_test_add_excl), + KUNIT_CASE(dev_addr_test_snapshot_sync), + KUNIT_CASE(dev_addr_test_snapshot_remove_during_sync), + KUNIT_CASE(dev_addr_test_snapshot_readd_during_unsync), + KUNIT_CASE(dev_addr_test_snapshot_add_and_remove), + KUNIT_CASE_SLOW(dev_addr_test_snapshot_benchmark), {} }; @@ -243,5 +625,6 @@ static struct kunit_suite dev_addr_test_suite = { }; kunit_test_suite(dev_addr_test_suite); +MODULE_IMPORT_NS("EXPORTED_FOR_KUNIT_TESTING"); MODULE_DESCRIPTION("KUnit tests for struct netdev_hw_addr_list"); MODULE_LICENSE("GPL"); diff --git a/net/core/dev_api.c b/net/core/dev_api.c index f28852078aa6..437947dd08ed 100644 --- a/net/core/dev_api.c +++ b/net/core/dev_api.c @@ -66,6 +66,7 @@ int dev_change_flags(struct net_device *dev, unsigned int flags, netdev_lock_ops(dev); ret = netif_change_flags(dev, flags, extack); + netif_rx_mode_sync(dev); netdev_unlock_ops(dev); return ret; @@ -285,6 +286,7 @@ int dev_set_promiscuity(struct net_device *dev, int inc) netdev_lock_ops(dev); ret = netif_set_promiscuity(dev, inc); + netif_rx_mode_sync(dev); netdev_unlock_ops(dev); return ret; @@ -311,6 +313,7 @@ int dev_set_allmulti(struct net_device *dev, int inc) netdev_lock_ops(dev); ret = netif_set_allmulti(dev, inc, true); + netif_rx_mode_sync(dev); netdev_unlock_ops(dev); return ret; diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 7a8966544c9d..f3979b276090 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -586,24 +586,26 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, return err; case SIOCADDMULTI: - if (!ops->ndo_set_rx_mode || + if ((!ops->ndo_set_rx_mode && !ops->ndo_set_rx_mode_async) || ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) return -ENODEV; netdev_lock_ops(dev); err = dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data); + netif_rx_mode_sync(dev); netdev_unlock_ops(dev); return err; case SIOCDELMULTI: - if (!ops->ndo_set_rx_mode || + if ((!ops->ndo_set_rx_mode && !ops->ndo_set_rx_mode_async) || ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) return -ENODEV; netdev_lock_ops(dev); err = dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data); + netif_rx_mode_sync(dev); netdev_unlock_ops(dev); return err; diff --git a/net/core/devmem.c b/net/core/devmem.c index cde4c89bc146..468344739db2 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -297,8 +297,7 @@ net_devmem_bind_dmabuf(struct net_device *dev, for (i = 0; i < owner->area.num_niovs; i++) { niov = &owner->area.niovs[i]; - niov->type = NET_IOV_DMABUF; - niov->owner = &owner->area; + net_iov_init(niov, &owner->area, NET_IOV_DMABUF); page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov), net_devmem_get_dma_addr(niov)); if (direction == DMA_TO_DEVICE) diff --git a/net/core/failover.c b/net/core/failover.c index 11bb183c7a1b..e43c59cd6868 100644 --- a/net/core/failover.c +++ b/net/core/failover.c @@ -12,6 +12,7 @@ #include <uapi/linux/if_arp.h> #include <linux/rtnetlink.h> #include <linux/if_vlan.h> +#include <net/netdev_lock.h> #include <net/failover.h> static LIST_HEAD(failover_list); @@ -221,8 +222,11 @@ failover_existing_slave_register(struct net_device *failover_dev) for_each_netdev(net, dev) { if (netif_is_failover(dev)) continue; - if (ether_addr_equal(failover_dev->perm_addr, dev->perm_addr)) + if (ether_addr_equal(failover_dev->perm_addr, dev->perm_addr)) { + netdev_lock_ops(dev); failover_slave_register(dev); + netdev_unlock_ops(dev); + } } rtnl_unlock(); } diff --git a/net/core/filter.c b/net/core/filter.c index fcfcb72663ca..9590877b0714 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -503,7 +503,7 @@ static bool convert_bpf_ld_abs(struct sock_filter *fp, struct bpf_insn **insnp) ((unaligned_ok && offset >= 0) || (!unaligned_ok && offset >= 0 && offset + ip_align >= 0 && - offset + ip_align % size == 0))) { + (offset + ip_align) % size == 0))) { bool ldx_off_ok = offset <= S16_MAX; *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H); @@ -1654,15 +1654,24 @@ err_prog_put: return err; } +static void sk_reuseport_prog_free_rcu(struct rcu_head *rcu) +{ + struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); + struct bpf_prog *prog = aux->prog; + + bpf_release_orig_filter(prog); + bpf_prog_free(prog); +} + void sk_reuseport_prog_free(struct bpf_prog *prog) { if (!prog) return; - if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT) - bpf_prog_put(prog); + if (bpf_prog_was_classic(prog)) + call_rcu(&prog->aux->rcu, sk_reuseport_prog_free_rcu); else - bpf_prog_destroy(prog); + bpf_prog_put(prog); } static inline int __bpf_try_make_writable(struct sk_buff *skb, @@ -5396,7 +5405,7 @@ static int bpf_sol_tcp_setsockopt(struct sock *sk, int optname, if (val <= 0) return -EINVAL; tp->snd_cwnd_clamp = val; - tp->snd_ssthresh = val; + WRITE_ONCE(tp->snd_ssthresh, val); break; case TCP_BPF_DELACK_MAX: timeout = usecs_to_jiffies(val); @@ -5481,7 +5490,7 @@ static int sol_tcp_sockopt(struct sock *sk, int optname, char *optval, int *optlen, bool getopt) { - if (sk->sk_protocol != IPPROTO_TCP) + if (!sk_is_tcp(sk)) return -EINVAL; switch (optname) { @@ -5688,6 +5697,30 @@ const struct bpf_func_proto bpf_sk_getsockopt_proto = { .arg5_type = ARG_CONST_SIZE, }; +BPF_CALL_5(bpf_sk_setsockopt_nodelay, struct sock *, sk, int, level, + int, optname, char *, optval, int, optlen) +{ + /* + * TCP_NODELAY triggers tcp_push_pending_frames() and re-enters + * CA_EVENT_TX_START in bpf_tcp_cc. + */ + if (level == SOL_TCP && optname == TCP_NODELAY) + return -EOPNOTSUPP; + + return _bpf_setsockopt(sk, level, optname, optval, optlen); +} + +const struct bpf_func_proto bpf_sk_setsockopt_nodelay_proto = { + .func = bpf_sk_setsockopt_nodelay, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg5_type = ARG_CONST_SIZE, +}; + BPF_CALL_5(bpf_unlocked_sk_setsockopt, struct sock *, sk, int, level, int, optname, char *, optval, int, optlen) { @@ -5833,6 +5866,12 @@ BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, if (!is_locked_tcp_sock_ops(bpf_sock)) return -EOPNOTSUPP; + /* TCP_NODELAY triggers tcp_push_pending_frames() and re-enters these callbacks. */ + if ((bpf_sock->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB || + bpf_sock->op == BPF_SOCK_OPS_WRITE_HDR_OPT_CB) && + level == SOL_TCP && optname == TCP_NODELAY) + return -EOPNOTSUPP; + return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen); } @@ -6443,6 +6482,8 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb, * against MTU of FIB lookup resulting net_device */ dev = dev_get_by_index_rcu(net, params->ifindex); + if (unlikely(!dev)) + return -ENODEV; if (!is_skb_forwardable(dev, skb)) rc = BPF_FIB_LKUP_RET_FRAG_NEEDED; @@ -7443,7 +7484,7 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, BPF_CALL_1(bpf_tcp_sock, struct sock *, sk) { - if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) + if (sk_fullsock(sk) && sk_is_tcp(sk)) return (unsigned long)sk; return (unsigned long)NULL; @@ -11915,7 +11956,7 @@ BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk) */ BTF_TYPE_EMIT(struct tcp6_sock); if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && - sk->sk_family == AF_INET6) + sk->sk_type == SOCK_STREAM && sk->sk_family == AF_INET6) return (unsigned long)sk; return (unsigned long)NULL; @@ -11931,7 +11972,7 @@ const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = { BPF_CALL_1(bpf_skc_to_tcp_sock, struct sock *, sk) { - if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) + if (sk && sk_fullsock(sk) && sk_is_tcp(sk)) return (unsigned long)sk; return (unsigned long)NULL; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 1b61bb25ba0e..2a98f5fa74eb 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1374,16 +1374,13 @@ proto_again: break; } - /* least significant bit of the most significant octet - * indicates if protocol field was compressed + /* PFC (compressed 1-byte protocol) frames are not processed. + * A compressed protocol field has the least significant bit of + * the most significant octet set, which will fail the following + * ppp_proto_is_valid(), returning FLOW_DISSECT_RET_OUT_BAD. */ ppp_proto = ntohs(hdr->proto); - if (ppp_proto & 0x0100) { - ppp_proto = ppp_proto >> 8; - nhoff += PPPOE_SES_HLEN - 1; - } else { - nhoff += PPPOE_SES_HLEN; - } + nhoff += PPPOE_SES_HLEN; if (ppp_proto == PPP_IP) { proto = htons(ETH_P_IP); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 9e12524b67fa..5d9216016507 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -3210,8 +3210,10 @@ int neigh_xmit(int index, struct net_device *dev, rcu_read_lock(); tbl = rcu_dereference(neigh_tables[index]); - if (!tbl) - goto out_unlock; + if (!tbl) { + rcu_read_unlock(); + goto out_kfree_skb; + } if (index == NEIGH_ARP_TABLE) { u32 key = *((u32 *)addr); @@ -3227,7 +3229,6 @@ int neigh_xmit(int index, struct net_device *dev, goto out_kfree_skb; } err = READ_ONCE(neigh->output)(neigh, skb); -out_unlock: rcu_read_unlock(); } else if (index == NEIGH_LINK_TABLE) { @@ -3237,11 +3238,10 @@ out_unlock: goto out_kfree_skb; err = dev_queue_xmit(skb); } -out: return err; out_kfree_skb: kfree_skb(skb); - goto out; + return err; } EXPORT_SYMBOL(neigh_xmit); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index cd74beffd209..84faace50ac2 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -608,14 +608,16 @@ EXPORT_SYMBOL_GPL(__netpoll_setup); /* * Returns a pointer to a string representation of the identifier used * to select the egress interface for the given netpoll instance. buf - * must be a buffer of length at least MAC_ADDR_STR_LEN + 1. + * is used to format np->dev_mac when np->dev_name is empty; bufsz must + * be at least MAC_ADDR_STR_LEN + 1 to fit the formatted MAC address + * and its NUL terminator. */ -static char *egress_dev(struct netpoll *np, char *buf) +static char *egress_dev(struct netpoll *np, char *buf, size_t bufsz) { if (np->dev_name[0]) return np->dev_name; - snprintf(buf, MAC_ADDR_STR_LEN, "%pM", np->dev_mac); + snprintf(buf, bufsz, "%pM", np->dev_mac); return buf; } @@ -645,7 +647,7 @@ static int netpoll_take_ipv6(struct netpoll *np, struct net_device *ndev) if (!IS_ENABLED(CONFIG_IPV6)) { np_err(np, "IPv6 is not supported %s, aborting\n", - egress_dev(np, buf)); + egress_dev(np, buf, sizeof(buf))); return -EINVAL; } @@ -667,7 +669,7 @@ static int netpoll_take_ipv6(struct netpoll *np, struct net_device *ndev) } if (err) { np_err(np, "no IPv6 address for %s, aborting\n", - egress_dev(np, buf)); + egress_dev(np, buf, sizeof(buf))); return err; } @@ -687,14 +689,14 @@ static int netpoll_take_ipv4(struct netpoll *np, struct net_device *ndev) in_dev = __in_dev_get_rtnl(ndev); if (!in_dev) { np_err(np, "no IP address for %s, aborting\n", - egress_dev(np, buf)); + egress_dev(np, buf, sizeof(buf))); return -EDESTADDRREQ; } ifa = rtnl_dereference(in_dev->ifa_list); if (!ifa) { np_err(np, "no IP address for %s, aborting\n", - egress_dev(np, buf)); + egress_dev(np, buf, sizeof(buf))); return -EDESTADDRREQ; } @@ -704,6 +706,23 @@ static int netpoll_take_ipv4(struct netpoll *np, struct net_device *ndev) return 0; } +/* + * Test whether the caller left np->local_ip unset, so that + * netpoll_setup() should auto-populate it from the egress device. + * + * np->local_ip is a union of __be32 (IPv4) and struct in6_addr (IPv6), + * so an IPv6 address whose first 4 bytes are zero (e.g. ::1, ::2, + * IPv4-mapped ::ffff:a.b.c.d) must not be tested via the IPv4 arm — + * doing so would misclassify a caller-supplied address as unset and + * silently overwrite it with whatever address the device exposes. + */ +static bool netpoll_local_ip_unset(const struct netpoll *np) +{ + if (np->ipv6) + return ipv6_addr_any(&np->local_ip.in6); + return !np->local_ip.ip; +} + int netpoll_setup(struct netpoll *np) { struct net *net = current->nsproxy->net_ns; @@ -719,7 +738,8 @@ int netpoll_setup(struct netpoll *np) ndev = dev_getbyhwaddr(net, ARPHRD_ETHER, np->dev_mac); if (!ndev) { - np_err(np, "%s doesn't exist, aborting\n", egress_dev(np, buf)); + np_err(np, "%s doesn't exist, aborting\n", + egress_dev(np, buf, sizeof(buf))); err = -ENODEV; goto unlock; } @@ -727,14 +747,14 @@ int netpoll_setup(struct netpoll *np) if (netdev_master_upper_dev_get(ndev)) { np_err(np, "%s is a slave device, aborting\n", - egress_dev(np, buf)); + egress_dev(np, buf, sizeof(buf))); err = -EBUSY; goto put; } if (!netif_running(ndev)) { np_info(np, "device %s not up yet, forcing it\n", - egress_dev(np, buf)); + egress_dev(np, buf, sizeof(buf))); err = dev_open(ndev, NULL); if (err) { @@ -747,7 +767,7 @@ int netpoll_setup(struct netpoll *np) rtnl_lock(); } - if (!np->local_ip.ip) { + if (netpoll_local_ip_unset(np)) { if (!np->ipv6) { err = netpoll_take_ipv4(np, ndev); if (err) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 877bbf7a1938..6e576dec80db 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -327,6 +327,11 @@ static void page_pool_uninit(struct page_pool *pool) if (!pool->system) free_percpu(pool->recycle_stats); #endif + + if (pool->mp_ops) { + pool->mp_ops->destroy(pool); + static_branch_dec(&page_pool_mem_providers); + } } /** @@ -1146,11 +1151,6 @@ static void __page_pool_destroy(struct page_pool *pool) page_pool_unlist(pool); page_pool_uninit(pool); - if (pool->mp_ops) { - pool->mp_ops->destroy(pool); - static_branch_dec(&page_pool_mem_providers); - } - kfree(pool); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 69daba3ddaf0..df042da422ef 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1572,6 +1572,7 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, port_guid.vf = ivi.vf; memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); + memset(&vf_broadcast, 0, sizeof(vf_broadcast)); memcpy(vf_broadcast.broadcast, dev->broadcast, dev->addr_len); vf_vlan.vlan = ivi.vlan; vf_vlan.qos = ivi.qos; @@ -3431,6 +3432,7 @@ errout: dev->name); } + netif_rx_mode_sync(dev); netdev_unlock_ops(dev); return err; diff --git a/net/core/sock_map.c b/net/core/sock_map.c index b0e96337a269..99e3789492a0 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -530,7 +530,7 @@ static bool sock_map_redirect_allowed(const struct sock *sk) if (sk_is_tcp(sk)) return sk->sk_state != TCP_LISTEN; else - return sk->sk_state == TCP_ESTABLISHED; + return READ_ONCE(sk->sk_state) == TCP_ESTABLISHED; } static bool sock_map_sk_is_suitable(const struct sock *sk) @@ -543,7 +543,7 @@ static bool sock_map_sk_state_allowed(const struct sock *sk) if (sk_is_tcp(sk)) return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN); if (sk_is_stream_unix(sk)) - return (1 << sk->sk_state) & TCPF_ESTABLISHED; + return (1 << READ_ONCE(sk->sk_state)) & TCPF_ESTABLISHED; if (sk_is_vsock(sk) && (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) return (1 << sk->sk_state) & TCPF_ESTABLISHED; @@ -1630,18 +1630,23 @@ void sock_map_unhash(struct sock *sk) void (*saved_unhash)(struct sock *sk); struct sk_psock *psock; +retry: rcu_read_lock(); psock = sk_psock(sk); if (unlikely(!psock)) { rcu_read_unlock(); saved_unhash = READ_ONCE(sk->sk_prot)->unhash; + if (unlikely(saved_unhash == sock_map_unhash)) + goto retry; } else { saved_unhash = psock->saved_unhash; sock_map_remove_links(sk, psock); rcu_read_unlock(); + + if (WARN_ON_ONCE(saved_unhash == sock_map_unhash)) + return; } - if (WARN_ON_ONCE(saved_unhash == sock_map_unhash)) - return; + if (saved_unhash) saved_unhash(sk); } @@ -1652,20 +1657,25 @@ void sock_map_destroy(struct sock *sk) void (*saved_destroy)(struct sock *sk); struct sk_psock *psock; +retry: rcu_read_lock(); psock = sk_psock_get(sk); if (unlikely(!psock)) { rcu_read_unlock(); saved_destroy = READ_ONCE(sk->sk_prot)->destroy; + if (unlikely(saved_destroy == sock_map_destroy)) + goto retry; } else { saved_destroy = psock->saved_destroy; sock_map_remove_links(sk, psock); rcu_read_unlock(); sk_psock_stop(psock); sk_psock_put(sk, psock); + + if (WARN_ON_ONCE(saved_destroy == sock_map_destroy)) + return; } - if (WARN_ON_ONCE(saved_destroy == sock_map_destroy)) - return; + if (saved_destroy) saved_destroy(sk); } @@ -1676,32 +1686,33 @@ void sock_map_close(struct sock *sk, long timeout) void (*saved_close)(struct sock *sk, long timeout); struct sk_psock *psock; +retry: lock_sock(sk); rcu_read_lock(); - psock = sk_psock(sk); + psock = sk_psock_get(sk); if (likely(psock)) { saved_close = psock->saved_close; sock_map_remove_links(sk, psock); - psock = sk_psock_get(sk); - if (unlikely(!psock)) - goto no_psock; rcu_read_unlock(); sk_psock_stop(psock); release_sock(sk); cancel_delayed_work_sync(&psock->work); sk_psock_put(sk, psock); + + /* Make sure we do not recurse. This is a bug. + * Leak the socket instead of crashing on a stack overflow. + */ + if (WARN_ON_ONCE(saved_close == sock_map_close)) + return; } else { saved_close = READ_ONCE(sk->sk_prot)->close; -no_psock: rcu_read_unlock(); release_sock(sk); + + if (unlikely(saved_close == sock_map_close)) + goto retry; } - /* Make sure we do not recurse. This is a bug. - * Leak the socket instead of crashing on a stack overflow. - */ - if (WARN_ON_ONCE(saved_close == sock_map_close)) - return; saved_close(sk, timeout); } EXPORT_SYMBOL_GPL(sock_map_close); diff --git a/net/dsa/conduit.c b/net/dsa/conduit.c index a1b044467bd6..8398d72d7e4d 100644 --- a/net/dsa/conduit.c +++ b/net/dsa/conduit.c @@ -27,9 +27,7 @@ static int dsa_conduit_get_regs_len(struct net_device *dev) int len; if (ops && ops->get_regs_len) { - netdev_lock_ops(dev); len = ops->get_regs_len(dev); - netdev_unlock_ops(dev); if (len < 0) return len; ret += len; @@ -60,15 +58,11 @@ static void dsa_conduit_get_regs(struct net_device *dev, int len; if (ops && ops->get_regs_len && ops->get_regs) { - netdev_lock_ops(dev); len = ops->get_regs_len(dev); - if (len < 0) { - netdev_unlock_ops(dev); + if (len < 0) return; - } regs->len = len; ops->get_regs(dev, regs, data); - netdev_unlock_ops(dev); data += regs->len; } @@ -115,10 +109,8 @@ static void dsa_conduit_get_ethtool_stats(struct net_device *dev, int count, mcount = 0; if (ops && ops->get_sset_count && ops->get_ethtool_stats) { - netdev_lock_ops(dev); mcount = ops->get_sset_count(dev, ETH_SS_STATS); ops->get_ethtool_stats(dev, stats, data); - netdev_unlock_ops(dev); } list_for_each_entry(dp, &dst->ports, list) { @@ -149,10 +141,8 @@ static void dsa_conduit_get_ethtool_phy_stats(struct net_device *dev, if (count >= 0) phy_ethtool_get_stats(dev->phydev, stats, data); } else if (ops && ops->get_sset_count && ops->get_ethtool_phy_stats) { - netdev_lock_ops(dev); count = ops->get_sset_count(dev, ETH_SS_PHY_STATS); ops->get_ethtool_phy_stats(dev, stats, data); - netdev_unlock_ops(dev); } if (count < 0) @@ -176,13 +166,11 @@ static int dsa_conduit_get_sset_count(struct net_device *dev, int sset) struct dsa_switch_tree *dst = cpu_dp->dst; int count = 0; - netdev_lock_ops(dev); if (sset == ETH_SS_PHY_STATS && dev->phydev && (!ops || !ops->get_ethtool_phy_stats)) count = phy_ethtool_get_sset_count(dev->phydev); else if (ops && ops->get_sset_count) count = ops->get_sset_count(dev, sset); - netdev_unlock_ops(dev); if (count < 0) count = 0; @@ -239,7 +227,6 @@ static void dsa_conduit_get_strings(struct net_device *dev, u32 stringset, struct dsa_switch_tree *dst = cpu_dp->dst; int count, mcount = 0; - netdev_lock_ops(dev); if (stringset == ETH_SS_PHY_STATS && dev->phydev && !ops->get_ethtool_phy_stats) { mcount = phy_ethtool_get_sset_count(dev->phydev); @@ -253,7 +240,6 @@ static void dsa_conduit_get_strings(struct net_device *dev, u32 stringset, mcount = 0; ops->get_strings(dev, stringset, data); } - netdev_unlock_ops(dev); list_for_each_entry(dp, &dst->ports, list) { if (!dsa_port_is_dsa(dp) && !dsa_port_is_cpu(dp)) diff --git a/net/ethtool/bitset.c b/net/ethtool/bitset.c index 8bb98d3ea3db..a3a2cc6480a0 100644 --- a/net/ethtool/bitset.c +++ b/net/ethtool/bitset.c @@ -92,7 +92,7 @@ static bool ethnl_bitmap32_not_zero(const u32 *map, unsigned int start, u32 mask; if (end <= start) - return true; + return false; if (start % 32) { mask = ethnl_upper_bits(start); @@ -105,11 +105,11 @@ static bool ethnl_bitmap32_not_zero(const u32 *map, unsigned int start, start_word++; } - if (!memchr_inv(map + start_word, '\0', - (end_word - start_word) * sizeof(u32))) + if (memchr_inv(map + start_word, '\0', + (end_word - start_word) * sizeof(u32))) return true; if (end % 32 == 0) - return true; + return false; return map[end_word] & ethnl_lower_bits(end); } diff --git a/net/ethtool/phy.c b/net/ethtool/phy.c index d4e6887055ab..ddc6eab701ed 100644 --- a/net/ethtool/phy.c +++ b/net/ethtool/phy.c @@ -76,6 +76,7 @@ static int phy_prepare_data(const struct ethnl_req_info *req_info, struct nlattr **tb = info->attrs; struct phy_device_node *pdn; struct phy_device *phydev; + int ret; /* RTNL is held by the caller */ phydev = ethnl_req_get_phydev(req_info, tb, ETHTOOL_A_PHY_HEADER, @@ -88,8 +89,19 @@ static int phy_prepare_data(const struct ethnl_req_info *req_info, return -EOPNOTSUPP; rep_data->phyindex = phydev->phyindex; + rep_data->name = kstrdup(dev_name(&phydev->mdio.dev), GFP_KERNEL); - rep_data->drvname = kstrdup(phydev->drv->name, GFP_KERNEL); + if (!rep_data->name) + return -ENOMEM; + + if (phydev->drv) { + rep_data->drvname = kstrdup(phydev->drv->name, GFP_KERNEL); + if (!rep_data->drvname) { + ret = -ENOMEM; + goto err_free_name; + } + } + rep_data->upstream_type = pdn->upstream_type; if (pdn->upstream_type == PHY_UPSTREAM_PHY) { @@ -97,15 +109,33 @@ static int phy_prepare_data(const struct ethnl_req_info *req_info, rep_data->upstream_index = upstream->phyindex; } - if (pdn->parent_sfp_bus) + if (pdn->parent_sfp_bus) { rep_data->upstream_sfp_name = kstrdup(sfp_get_name(pdn->parent_sfp_bus), GFP_KERNEL); + if (!rep_data->upstream_sfp_name) { + ret = -ENOMEM; + goto err_free_drvname; + } + } - if (phydev->sfp_bus) + if (phydev->sfp_bus) { rep_data->downstream_sfp_name = kstrdup(sfp_get_name(phydev->sfp_bus), GFP_KERNEL); + if (!rep_data->downstream_sfp_name) { + ret = -ENOMEM; + goto err_free_upstream_sfp; + } + } return 0; + +err_free_upstream_sfp: + kfree(rep_data->upstream_sfp_name); +err_free_drvname: + kfree(rep_data->drvname); +err_free_name: + kfree(rep_data->name); + return ret; } static int phy_fill_reply(struct sk_buff *skb, diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index d09875b33588..124619920d38 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -889,7 +889,10 @@ int hsr_get_node_data(struct hsr_priv *hsr, if (node->addr_B_port != HSR_PT_NONE) { port = hsr_port_get_hsr(hsr, node->addr_B_port); - *addr_b_ifindex = port->dev->ifindex; + if (port) + *addr_b_ifindex = port->dev->ifindex; + else + *addr_b_ifindex = -1; } else { *addr_b_ifindex = -1; } diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 5fb812443a08..4366cbac3f06 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -124,9 +124,14 @@ static void ah_output_done(void *data, int err) struct iphdr *top_iph = ip_hdr(skb); struct ip_auth_hdr *ah = ip_auth_hdr(skb); int ihl = ip_hdrlen(skb); + int seqhi_len = 0; + __be32 *seqhi; + if (x->props.flags & XFRM_STATE_ESN) + seqhi_len = sizeof(*seqhi); iph = AH_SKB_CB(skb)->tmp; - icv = ah_tmp_icv(iph, ihl); + seqhi = (__be32 *)((char *)iph + ihl); + icv = ah_tmp_icv(seqhi, seqhi_len); memcpy(ah->auth_data, icv, ahp->icv_trunc_len); top_iph->tos = iph->tos; @@ -270,12 +275,17 @@ static void ah_input_done(void *data, int err) struct ip_auth_hdr *ah = ip_auth_hdr(skb); int ihl = ip_hdrlen(skb); int ah_hlen = (ah->hdrlen + 2) << 2; + int seqhi_len = 0; + __be32 *seqhi; if (err) goto out; + if (x->props.flags & XFRM_STATE_ESN) + seqhi_len = sizeof(*seqhi); work_iph = AH_SKB_CB(skb)->tmp; - auth_data = ah_tmp_auth(work_iph, ihl); + seqhi = (__be32 *)((char *)work_iph + ihl); + auth_data = ah_tmp_auth(seqhi, seqhi_len); icv = ah_tmp_icv(auth_data, ahp->icv_trunc_len); err = crypto_memneq(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 51d70180e1cc..d409f606aec0 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -109,7 +109,6 @@ #include <net/sock.h> #include <net/arp.h> #include <net/ax25.h> -#include <net/netrom.h> #include <net/dst_metadata.h> #include <net/ip_tunnels.h> diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 008edc7f6688..791e15063237 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -168,7 +168,7 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id, */ if (prog_ops_moff(prog) != offsetof(struct tcp_congestion_ops, release)) - return &bpf_sk_setsockopt_proto; + return &bpf_sk_setsockopt_nodelay_proto; return NULL; case BPF_FUNC_getsockopt: /* Since get/setsockopt is usually expected to diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 6dfc0bcdef65..6a5febbdbee4 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -873,7 +873,8 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) nfrags = 1; goto skip_cow; - } else if (!skb_has_frag_list(skb)) { + } else if (!skb_has_frag_list(skb) && + !skb_has_shared_frag(skb)) { nfrags = skb_shinfo(skb)->nr_frags; nfrags++; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 2f4fac22d1ab..7eeff658b467 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -64,6 +64,7 @@ #include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/fcntl.h> +#include <linux/nospec.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/inet.h> @@ -371,7 +372,9 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd, to, len); skb->csum = csum_block_add(skb->csum, csum, odd); - if (icmp_pointers[icmp_param->data.icmph.type].error) + if (icmp_param->data.icmph.type <= NR_ICMP_TYPES && + icmp_pointers[array_index_nospec(icmp_param->data.icmph.type, + NR_ICMP_TYPES + 1)].error) nf_ct_attach(skb, icmp_param->skb); return 0; } diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index a674fb44ec25..a9ad39064f3b 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -122,16 +122,29 @@ * contradict to specs provided this delay is small enough. */ -#define IGMP_V1_SEEN(in_dev) \ - (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 1 || \ - IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \ - ((in_dev)->mr_v1_seen && \ - time_before(jiffies, (in_dev)->mr_v1_seen))) -#define IGMP_V2_SEEN(in_dev) \ - (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 2 || \ - IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \ - ((in_dev)->mr_v2_seen && \ - time_before(jiffies, (in_dev)->mr_v2_seen))) +static bool IGMP_V1_SEEN(const struct in_device *in_dev) +{ + unsigned long seen; + + if (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 1) + return true; + if (IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1) + return true; + seen = READ_ONCE(in_dev->mr_v1_seen); + return seen && time_before(jiffies, seen); +} + +static bool IGMP_V2_SEEN(const struct in_device *in_dev) +{ + unsigned long seen; + + if (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 2) + return true; + if (IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2) + return true; + seen = READ_ONCE(in_dev->mr_v2_seen); + return seen && time_before(jiffies, seen); +} static int unsolicited_report_interval(struct in_device *in_dev) { @@ -954,23 +967,21 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, int max_delay; int mark = 0; struct net *net = dev_net(in_dev->dev); - + unsigned long seen; if (len == 8) { + seen = jiffies + READ_ONCE(in_dev->mr_qrv) * READ_ONCE(in_dev->mr_qi) + + READ_ONCE(in_dev->mr_qri); if (ih->code == 0) { /* Alas, old v1 router presents here. */ max_delay = IGMP_QUERY_RESPONSE_INTERVAL; - in_dev->mr_v1_seen = jiffies + - (in_dev->mr_qrv * in_dev->mr_qi) + - in_dev->mr_qri; + WRITE_ONCE(in_dev->mr_v1_seen, seen); group = 0; } else { /* v2 router present */ max_delay = ih->code*(HZ/IGMP_TIMER_SCALE); - in_dev->mr_v2_seen = jiffies + - (in_dev->mr_qrv * in_dev->mr_qi) + - in_dev->mr_qri; + WRITE_ONCE(in_dev->mr_v2_seen, seen); } /* cancel the interface change timer */ WRITE_ONCE(in_dev->mr_ifc_count, 0); @@ -995,6 +1006,8 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, if (!max_delay) max_delay = 1; /* can't mod w/ 0 */ } else { /* v3 */ + unsigned long mr_qi; + if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) return true; @@ -1015,15 +1028,16 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, * received value was zero, use the default or statically * configured value. */ - in_dev->mr_qrv = ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); - in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL; - + WRITE_ONCE(in_dev->mr_qrv, + ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv)); + mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL; + WRITE_ONCE(in_dev->mr_qi, mr_qi); /* RFC3376, 8.3. Query Response Interval: * The number of seconds represented by the [Query Response * Interval] must be less than the [Query Interval]. */ - if (in_dev->mr_qri >= in_dev->mr_qi) - in_dev->mr_qri = (in_dev->mr_qi/HZ - 1)*HZ; + if (READ_ONCE(in_dev->mr_qri) >= mr_qi) + WRITE_ONCE(in_dev->mr_qri, (mr_qi/HZ - 1) * HZ); if (!group) { /* general query */ if (ih3->nsrcs) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 4ac3ae1bc1af..dbcd37dfdc15 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1108,7 +1108,7 @@ static void reqsk_timer_handler(struct timer_list *t) if (!inet_ehash_insert(req_to_sk(nreq), req_to_sk(oreq), NULL)) { /* delete timer */ - __inet_csk_reqsk_queue_drop(sk_listener, nreq, true); + __inet_csk_reqsk_queue_drop(sk_listener, nreq, false); goto no_ownership; } @@ -1134,7 +1134,7 @@ no_ownership: } drop: - __inet_csk_reqsk_queue_drop(sk_listener, oreq, true); + __inet_csk_reqsk_queue_drop(oreq->rsk_listener, oreq, true); reqsk_put(oreq); } @@ -1479,16 +1479,19 @@ void inet_csk_listen_stop(struct sock *sk) if (nreq) { refcount_set(&nreq->rsk_refcnt, 1); + rcu_read_lock(); if (inet_csk_reqsk_queue_add(nsk, nreq, child)) { __NET_INC_STATS(sock_net(nsk), LINUX_MIB_TCPMIGRATEREQSUCCESS); reqsk_migrate_reset(req); + READ_ONCE(nsk->sk_data_ready)(nsk); } else { __NET_INC_STATS(sock_net(nsk), LINUX_MIB_TCPMIGRATEREQFAILURE); reqsk_migrate_reset(nreq); __reqsk_free(nreq); } + rcu_read_unlock(); /* inet_csk_reqsk_queue_add() has already * called inet_child_forget() on failure case. diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index d8083b9033c2..5b957a831e7c 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -179,7 +179,8 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base, seq = read_seqbegin(&base->lock); p = lookup(daddr, base, seq, NULL, &gc_cnt, &parent, &pp); - if (p) + /* Make sure tree was not modified during our lookup. */ + if (p && !read_seqretry(&base->lock, seq)) return p; /* retry an exact lookup, taking the lock before. diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e4790cc7b5c2..5bcd73cbdb41 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1233,6 +1233,8 @@ alloc_new_skb: if (err < 0) goto error; copy = err; + if (!(flags & MSG_NO_SHARED_FRAGS)) + skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG; wmem_alloc_delta += copy; } else if (!zc) { int i = skb_shinfo(skb)->nr_frags; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8a08d09b4c30..2628cd3a93a6 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -151,16 +151,6 @@ static struct mr_table *__ipmr_get_table(struct net *net, u32 id) return NULL; } -static struct mr_table *ipmr_get_table(struct net *net, u32 id) -{ - struct mr_table *mrt; - - rcu_read_lock(); - mrt = __ipmr_get_table(net, id); - rcu_read_unlock(); - return mrt; -} - static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, struct mr_table **mrt) { @@ -293,7 +283,7 @@ static void __net_exit ipmr_rules_exit_rtnl(struct net *net, struct mr_table *mrt, *next; list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { - list_del(&mrt->list); + list_del_rcu(&mrt->list); ipmr_free_table(mrt, dev_kill_list); } } @@ -315,28 +305,30 @@ bool ipmr_rule_default(const struct fib_rule *rule) } EXPORT_SYMBOL(ipmr_rule_default); #else -#define ipmr_for_each_table(mrt, net) \ - for (mrt = net->ipv4.mrt; mrt; mrt = NULL) - static struct mr_table *ipmr_mr_table_iter(struct net *net, struct mr_table *mrt) { if (!mrt) - return net->ipv4.mrt; + return rcu_dereference(net->ipv4.mrt); return NULL; } -static struct mr_table *ipmr_get_table(struct net *net, u32 id) +static struct mr_table *__ipmr_get_table(struct net *net, u32 id) { - return net->ipv4.mrt; + return rcu_dereference_check(net->ipv4.mrt, + lockdep_rtnl_is_held() || + !rcu_access_pointer(net->ipv4.mrt)); } -#define __ipmr_get_table ipmr_get_table +#define ipmr_for_each_table(mrt, net) \ + for (mrt = __ipmr_get_table(net, 0); mrt; mrt = NULL) static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, struct mr_table **mrt) { - *mrt = net->ipv4.mrt; + *mrt = rcu_dereference(net->ipv4.mrt); + if (!*mrt) + return -EAGAIN; return 0; } @@ -347,7 +339,8 @@ static int __net_init ipmr_rules_init(struct net *net) mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); if (IS_ERR(mrt)) return PTR_ERR(mrt); - net->ipv4.mrt = mrt; + + rcu_assign_pointer(net->ipv4.mrt, mrt); return 0; } @@ -358,9 +351,10 @@ static void __net_exit ipmr_rules_exit(struct net *net) static void __net_exit ipmr_rules_exit_rtnl(struct net *net, struct list_head *dev_kill_list) { - ipmr_free_table(net->ipv4.mrt, dev_kill_list); + struct mr_table *mrt = rcu_dereference_protected(net->ipv4.mrt, 1); - net->ipv4.mrt = NULL; + RCU_INIT_POINTER(net->ipv4.mrt, NULL); + ipmr_free_table(mrt, dev_kill_list); } static int ipmr_rules_dump(struct net *net, struct notifier_block *nb, @@ -381,6 +375,17 @@ bool ipmr_rule_default(const struct fib_rule *rule) EXPORT_SYMBOL(ipmr_rule_default); #endif +static struct mr_table *ipmr_get_table(struct net *net, u32 id) +{ + struct mr_table *mrt; + + rcu_read_lock(); + mrt = __ipmr_get_table(net, id); + rcu_read_unlock(); + + return mrt; +} + static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg, const void *ptr) { @@ -441,12 +446,11 @@ static void ipmr_free_table(struct mr_table *mrt, struct list_head *dev_kill_lis WARN_ON_ONCE(!mr_can_free_table(net)); - timer_shutdown_sync(&mrt->ipmr_expire_timer); mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC | MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC, &ipmr_dev_kill_list); - rhltable_destroy(&mrt->mfc_hash); - kfree(mrt); + timer_shutdown_sync(&mrt->ipmr_expire_timer); + mr_table_free(mrt); WARN_ON_ONCE(!net_initialized(net) && !list_empty(&ipmr_dev_kill_list)); list_splice(&ipmr_dev_kill_list, dev_kill_list); @@ -533,15 +537,16 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) }; int err; + rcu_read_lock(); err = ipmr_fib_lookup(net, &fl4, &mrt); if (err < 0) { + rcu_read_unlock(); kfree_skb(skb); return err; } DEV_STATS_ADD(dev, tx_bytes, skb->len); DEV_STATS_INC(dev, tx_packets); - rcu_read_lock(); /* Pairs with WRITE_ONCE() in vif_add() and vif_delete() */ ipmr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num), @@ -1108,11 +1113,12 @@ static int ipmr_cache_report(const struct mr_table *mrt, msg->im_vif_hi = vifi >> 8; ipv4_pktinfo_prepare(mroute_sk, pkt, false); memcpy(skb->cb, pkt->cb, sizeof(skb->cb)); - /* Add our header */ - igmp = skb_put(skb, sizeof(struct igmphdr)); + /* Add our header. + * Note that code, csum and group fields are cleared. + */ + igmp = skb_put_zero(skb, sizeof(struct igmphdr)); igmp->type = assert; msg->im_msgtype = assert; - igmp->code = 0; ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ skb->transport_header = skb->network_header; } @@ -1135,12 +1141,19 @@ static int ipmr_cache_report(const struct mr_table *mrt, static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb, struct net_device *dev) { + struct net *net = read_pnet(&mrt->net); const struct iphdr *iph = ip_hdr(skb); - struct mfc_cache *c; + struct mfc_cache *c = NULL; bool found = false; int err; spin_lock_bh(&mfc_unres_lock); + + if (!check_net(net)) { + err = -EINVAL; + goto err; + } + list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) { if (c->mfc_mcastgrp == iph->daddr && c->mfc_origin == iph->saddr) { @@ -1153,10 +1166,8 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, /* Create a new entry if allowable */ c = ipmr_cache_alloc_unres(); if (!c) { - spin_unlock_bh(&mfc_unres_lock); - - kfree_skb(skb); - return -ENOBUFS; + err = -ENOBUFS; + goto err; } /* Fill in the new cache entry */ @@ -1166,17 +1177,8 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, /* Reflect first query at mrouted. */ err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); - - if (err < 0) { - /* If the report failed throw the cache entry - out - Brad Parker - */ - spin_unlock_bh(&mfc_unres_lock); - - ipmr_cache_free(c); - kfree_skb(skb); - return err; - } + if (err < 0) + goto err; atomic_inc(&mrt->cache_resolve_queue_len); list_add(&c->_c.list, &mrt->mfc_unres_queue); @@ -1189,18 +1191,26 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, /* See if we can append the packet */ if (c->_c.mfc_un.unres.unresolved.qlen > 3) { - kfree_skb(skb); + c = NULL; err = -ENOBUFS; - } else { - if (dev) { - skb->dev = dev; - skb->skb_iif = dev->ifindex; - } - skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); - err = 0; + goto err; } + if (dev) { + skb->dev = dev; + skb->skb_iif = dev->ifindex; + } + + skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); + spin_unlock_bh(&mfc_unres_lock); + return 0; + +err: + spin_unlock_bh(&mfc_unres_lock); + if (c) + ipmr_cache_free(c); + kfree_skb(skb); return err; } @@ -1346,7 +1356,7 @@ static void mroute_clean_tables(struct mr_table *mrt, int flags, } if (flags & MRT_FLUSH_MFC) { - if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { + if (atomic_read(&mrt->cache_resolve_queue_len) != 0 || !check_net(net)) { spin_lock_bh(&mfc_unres_lock); list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { list_del(&c->list); diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index 37a3c144276c..3930d612c3de 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -28,6 +28,20 @@ void vif_device_init(struct vif_device *v, v->link = dev->ifindex; } +static void __mr_free_table(struct work_struct *work) +{ + struct mr_table *mrt = container_of(to_rcu_work(work), + struct mr_table, work); + + rhltable_destroy(&mrt->mfc_hash); + kfree(mrt); +} + +void mr_table_free(struct mr_table *mrt) +{ + queue_rcu_work(system_unbound_wq, &mrt->work); +} + struct mr_table * mr_table_alloc(struct net *net, u32 id, struct mr_table_ops *ops, @@ -50,6 +64,8 @@ mr_table_alloc(struct net *net, u32 id, kfree(mrt); return ERR_PTR(err); } + + INIT_RCU_WORK(&mrt->work, __mr_free_table); INIT_LIST_HEAD(&mrt->mfc_cache_list); INIT_LIST_HEAD(&mrt->mfc_unres_queue); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 1cdd9c28ab2d..ad2259678c78 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -110,13 +110,25 @@ static inline int arp_packet_match(const struct arphdr *arphdr, arpptr += dev->addr_len; memcpy(&src_ipaddr, arpptr, sizeof(u32)); arpptr += sizeof(u32); - tgt_devaddr = arpptr; - arpptr += dev->addr_len; + + if (IS_ENABLED(CONFIG_FIREWIRE_NET) && dev->type == ARPHRD_IEEE1394) { + if (unlikely(memchr_inv(arpinfo->tgt_devaddr.mask, 0, + sizeof(arpinfo->tgt_devaddr.mask)))) + return 0; + + tgt_devaddr = NULL; + } else { + tgt_devaddr = arpptr; + arpptr += dev->addr_len; + } memcpy(&tgt_ipaddr, arpptr, sizeof(u32)); if (NF_INVF(arpinfo, ARPT_INV_SRCDEVADDR, arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr, - dev->addr_len)) || + dev->addr_len))) + return 0; + + if (tgt_devaddr && NF_INVF(arpinfo, ARPT_INV_TGTDEVADDR, arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr, dev->addr_len))) @@ -1489,13 +1501,11 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len static void __arpt_unregister_table(struct net *net, struct xt_table *table) { - struct xt_table_info *private; - void *loc_cpu_entry; + struct xt_table_info *private = table->private; struct module *table_owner = table->me; + void *loc_cpu_entry; struct arpt_entry *iter; - private = xt_unregister_table(table); - /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries; xt_entry_foreach(iter, loc_cpu_entry, private->size) @@ -1503,6 +1513,7 @@ static void __arpt_unregister_table(struct net *net, struct xt_table *table) if (private->number > private->initial_entries) module_put(table_owner); xt_free_table_info(private); + kfree(table); } int arpt_register_table(struct net *net, @@ -1510,13 +1521,11 @@ int arpt_register_table(struct net *net, const struct arpt_replace *repl, const struct nf_hook_ops *template_ops) { - struct nf_hook_ops *ops; - unsigned int num_ops; - int ret, i; - struct xt_table_info *newinfo; struct xt_table_info bootstrap = {0}; - void *loc_cpu_entry; + struct xt_table_info *newinfo; struct xt_table *new_table; + void *loc_cpu_entry; + int ret; newinfo = xt_alloc_table_info(repl->size); if (!newinfo) @@ -1531,7 +1540,7 @@ int arpt_register_table(struct net *net, return ret; } - new_table = xt_register_table(net, table, &bootstrap, newinfo); + new_table = xt_register_table(net, table, template_ops, &bootstrap, newinfo); if (IS_ERR(new_table)) { struct arpt_entry *iter; @@ -1541,46 +1550,12 @@ int arpt_register_table(struct net *net, return PTR_ERR(new_table); } - num_ops = hweight32(table->valid_hooks); - if (num_ops == 0) { - ret = -EINVAL; - goto out_free; - } - - ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL); - if (!ops) { - ret = -ENOMEM; - goto out_free; - } - - for (i = 0; i < num_ops; i++) - ops[i].priv = new_table; - - new_table->ops = ops; - - ret = nf_register_net_hooks(net, ops, num_ops); - if (ret != 0) - goto out_free; - return ret; - -out_free: - __arpt_unregister_table(net, new_table); - return ret; -} - -void arpt_unregister_table_pre_exit(struct net *net, const char *name) -{ - struct xt_table *table = xt_find_table(net, NFPROTO_ARP, name); - - if (table) - nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks)); } -EXPORT_SYMBOL(arpt_unregister_table_pre_exit); void arpt_unregister_table(struct net *net, const char *name) { - struct xt_table *table = xt_find_table(net, NFPROTO_ARP, name); + struct xt_table *table = xt_unregister_table_exit(net, NFPROTO_ARP, name); if (table) __arpt_unregister_table(net, table); diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index a4e07e5e9c11..f65dd339208e 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c @@ -40,6 +40,10 @@ target(struct sk_buff *skb, const struct xt_action_param *par) } arpptr += pln; if (mangle->flags & ARPT_MANGLE_TDEV) { + if (unlikely(IS_ENABLED(CONFIG_FIREWIRE_NET) && + skb->dev->type == ARPHRD_IEEE1394)) + return NF_DROP; + if (ARPT_DEV_ADDR_LEN_MAX < hln || (arpptr + hln > skb_tail_pointer(skb))) return NF_DROP; @@ -47,6 +51,10 @@ target(struct sk_buff *skb, const struct xt_action_param *par) } arpptr += hln; if (mangle->flags & ARPT_MANGLE_TIP) { + if (unlikely(IS_ENABLED(CONFIG_FIREWIRE_NET) && + skb->dev->type == ARPHRD_IEEE1394)) + return NF_DROP; + if (ARPT_MANGLE_ADDR_LEN_MAX < pln || (arpptr + pln > skb_tail_pointer(skb))) return NF_DROP; diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 78cd5ee24448..370b635e3523 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -43,7 +43,7 @@ static int arptable_filter_table_init(struct net *net) static void __net_exit arptable_filter_net_pre_exit(struct net *net) { - arpt_unregister_table_pre_exit(net, "filter"); + xt_unregister_table_pre_exit(net, NFPROTO_ARP, "filter"); } static void __net_exit arptable_filter_net_exit(struct net *net) @@ -58,32 +58,33 @@ static struct pernet_operations arptable_filter_net_ops = { static int __init arptable_filter_init(void) { - int ret = xt_register_template(&packet_filter, - arptable_filter_table_init); - - if (ret < 0) - return ret; + int ret; arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arpt_do_table); - if (IS_ERR(arpfilter_ops)) { - xt_unregister_template(&packet_filter); + if (IS_ERR(arpfilter_ops)) return PTR_ERR(arpfilter_ops); - } ret = register_pernet_subsys(&arptable_filter_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(&packet_filter, + arptable_filter_table_init); if (ret < 0) { - xt_unregister_template(&packet_filter); - kfree(arpfilter_ops); - return ret; + unregister_pernet_subsys(&arptable_filter_net_ops); + goto err_free; } + return 0; +err_free: + kfree(arpfilter_ops); return ret; } static void __exit arptable_filter_fini(void) { - unregister_pernet_subsys(&arptable_filter_net_ops); xt_unregister_template(&packet_filter); + unregister_pernet_subsys(&arptable_filter_net_ops); kfree(arpfilter_ops); } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 23c8deff8095..5cbdb0815857 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1704,12 +1704,10 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) static void __ipt_unregister_table(struct net *net, struct xt_table *table) { - struct xt_table_info *private; - void *loc_cpu_entry; + struct xt_table_info *private = table->private; struct module *table_owner = table->me; struct ipt_entry *iter; - - private = xt_unregister_table(table); + void *loc_cpu_entry; /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries; @@ -1718,19 +1716,18 @@ static void __ipt_unregister_table(struct net *net, struct xt_table *table) if (private->number > private->initial_entries) module_put(table_owner); xt_free_table_info(private); + kfree(table); } int ipt_register_table(struct net *net, const struct xt_table *table, const struct ipt_replace *repl, const struct nf_hook_ops *template_ops) { - struct nf_hook_ops *ops; - unsigned int num_ops; - int ret, i; - struct xt_table_info *newinfo; struct xt_table_info bootstrap = {0}; - void *loc_cpu_entry; + struct xt_table_info *newinfo; struct xt_table *new_table; + void *loc_cpu_entry; + int ret; newinfo = xt_alloc_table_info(repl->size); if (!newinfo) @@ -1745,7 +1742,7 @@ int ipt_register_table(struct net *net, const struct xt_table *table, return ret; } - new_table = xt_register_table(net, table, &bootstrap, newinfo); + new_table = xt_register_table(net, table, template_ops, &bootstrap, newinfo); if (IS_ERR(new_table)) { struct ipt_entry *iter; @@ -1755,51 +1752,12 @@ int ipt_register_table(struct net *net, const struct xt_table *table, return PTR_ERR(new_table); } - /* No template? No need to do anything. This is used by 'nat' table, it registers - * with the nat core instead of the netfilter core. - */ - if (!template_ops) - return 0; - - num_ops = hweight32(table->valid_hooks); - if (num_ops == 0) { - ret = -EINVAL; - goto out_free; - } - - ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL); - if (!ops) { - ret = -ENOMEM; - goto out_free; - } - - for (i = 0; i < num_ops; i++) - ops[i].priv = new_table; - - new_table->ops = ops; - - ret = nf_register_net_hooks(net, ops, num_ops); - if (ret != 0) - goto out_free; - return ret; - -out_free: - __ipt_unregister_table(net, new_table); - return ret; -} - -void ipt_unregister_table_pre_exit(struct net *net, const char *name) -{ - struct xt_table *table = xt_find_table(net, NFPROTO_IPV4, name); - - if (table) - nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks)); } void ipt_unregister_table_exit(struct net *net, const char *name) { - struct xt_table *table = xt_find_table(net, NFPROTO_IPV4, name); + struct xt_table *table = xt_unregister_table_exit(net, NFPROTO_IPV4, name); if (table) __ipt_unregister_table(net, table); @@ -1887,7 +1845,6 @@ static void __exit ip_tables_fini(void) } EXPORT_SYMBOL(ipt_register_table); -EXPORT_SYMBOL(ipt_unregister_table_pre_exit); EXPORT_SYMBOL(ipt_unregister_table_exit); EXPORT_SYMBOL(ipt_do_table); module_init(ip_tables_init); diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 3ab908b74795..672d7da1071d 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -61,7 +61,7 @@ static int __net_init iptable_filter_net_init(struct net *net) static void __net_exit iptable_filter_net_pre_exit(struct net *net) { - ipt_unregister_table_pre_exit(net, "filter"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "filter"); } static void __net_exit iptable_filter_net_exit(struct net *net) @@ -77,32 +77,33 @@ static struct pernet_operations iptable_filter_net_ops = { static int __init iptable_filter_init(void) { - int ret = xt_register_template(&packet_filter, - iptable_filter_table_init); - - if (ret < 0) - return ret; + int ret; filter_ops = xt_hook_ops_alloc(&packet_filter, ipt_do_table); - if (IS_ERR(filter_ops)) { - xt_unregister_template(&packet_filter); + if (IS_ERR(filter_ops)) return PTR_ERR(filter_ops); - } ret = register_pernet_subsys(&iptable_filter_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(&packet_filter, + iptable_filter_table_init); if (ret < 0) { - xt_unregister_template(&packet_filter); - kfree(filter_ops); - return ret; + unregister_pernet_subsys(&iptable_filter_net_ops); + goto err_free; } return 0; +err_free: + kfree(filter_ops); + return ret; } static void __exit iptable_filter_fini(void) { - unregister_pernet_subsys(&iptable_filter_net_ops); xt_unregister_template(&packet_filter); + unregister_pernet_subsys(&iptable_filter_net_ops); kfree(filter_ops); } diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 385d945d8ebe..13d25d9a4610 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -96,7 +96,7 @@ static int iptable_mangle_table_init(struct net *net) static void __net_exit iptable_mangle_net_pre_exit(struct net *net) { - ipt_unregister_table_pre_exit(net, "mangle"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "mangle"); } static void __net_exit iptable_mangle_net_exit(struct net *net) @@ -111,32 +111,33 @@ static struct pernet_operations iptable_mangle_net_ops = { static int __init iptable_mangle_init(void) { - int ret = xt_register_template(&packet_mangler, - iptable_mangle_table_init); - if (ret < 0) - return ret; + int ret; mangle_ops = xt_hook_ops_alloc(&packet_mangler, iptable_mangle_hook); - if (IS_ERR(mangle_ops)) { - xt_unregister_template(&packet_mangler); - ret = PTR_ERR(mangle_ops); - return ret; - } + if (IS_ERR(mangle_ops)) + return PTR_ERR(mangle_ops); ret = register_pernet_subsys(&iptable_mangle_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(&packet_mangler, + iptable_mangle_table_init); if (ret < 0) { - xt_unregister_template(&packet_mangler); - kfree(mangle_ops); - return ret; + unregister_pernet_subsys(&iptable_mangle_net_ops); + goto err_free; } + return 0; +err_free: + kfree(mangle_ops); return ret; } static void __exit iptable_mangle_fini(void) { - unregister_pernet_subsys(&iptable_mangle_net_ops); xt_unregister_template(&packet_mangler); + unregister_pernet_subsys(&iptable_mangle_net_ops); kfree(mangle_ops); } diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index a5db7c67d61b..a0df72554025 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -79,7 +79,7 @@ static int ipt_nat_register_lookups(struct net *net) while (i) nf_nat_ipv4_unregister_fn(net, &ops[--i]); - kfree(ops); + kfree_rcu(ops, rcu); return ret; } } @@ -100,7 +100,7 @@ static void ipt_nat_unregister_lookups(struct net *net) for (i = 0; i < ARRAY_SIZE(nf_nat_ipv4_ops); i++) nf_nat_ipv4_unregister_fn(net, &ops[i]); - kfree(ops); + kfree_rcu(ops, rcu); } static int iptable_nat_table_init(struct net *net) @@ -119,8 +119,11 @@ static int iptable_nat_table_init(struct net *net) } ret = ipt_nat_register_lookups(net); - if (ret < 0) + if (ret < 0) { + xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "nat"); + synchronize_rcu(); ipt_unregister_table_exit(net, "nat"); + } kfree(repl); return ret; @@ -129,6 +132,7 @@ static int iptable_nat_table_init(struct net *net) static void __net_exit iptable_nat_net_pre_exit(struct net *net) { ipt_nat_unregister_lookups(net); + xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "nat"); } static void __net_exit iptable_nat_net_exit(struct net *net) diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 0e7f53964d0a..2745c22f4034 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -53,7 +53,7 @@ static int iptable_raw_table_init(struct net *net) static void __net_exit iptable_raw_net_pre_exit(struct net *net) { - ipt_unregister_table_pre_exit(net, "raw"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "raw"); } static void __net_exit iptable_raw_net_exit(struct net *net) @@ -77,32 +77,32 @@ static int __init iptable_raw_init(void) pr_info("Enabling raw table before defrag\n"); } - ret = xt_register_template(table, - iptable_raw_table_init); - if (ret < 0) - return ret; - rawtable_ops = xt_hook_ops_alloc(table, ipt_do_table); - if (IS_ERR(rawtable_ops)) { - xt_unregister_template(table); + if (IS_ERR(rawtable_ops)) return PTR_ERR(rawtable_ops); - } ret = register_pernet_subsys(&iptable_raw_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(table, + iptable_raw_table_init); if (ret < 0) { - xt_unregister_template(table); - kfree(rawtable_ops); - return ret; + unregister_pernet_subsys(&iptable_raw_net_ops); + goto err_free; } + return 0; +err_free: + kfree(rawtable_ops); return ret; } static void __exit iptable_raw_fini(void) { + xt_unregister_template(&packet_raw); unregister_pernet_subsys(&iptable_raw_net_ops); kfree(rawtable_ops); - xt_unregister_template(&packet_raw); } module_init(iptable_raw_init); diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index d885443cb267..491894511c54 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -50,7 +50,7 @@ static int iptable_security_table_init(struct net *net) static void __net_exit iptable_security_net_pre_exit(struct net *net) { - ipt_unregister_table_pre_exit(net, "security"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "security"); } static void __net_exit iptable_security_net_exit(struct net *net) @@ -65,33 +65,34 @@ static struct pernet_operations iptable_security_net_ops = { static int __init iptable_security_init(void) { - int ret = xt_register_template(&security_table, - iptable_security_table_init); - - if (ret < 0) - return ret; + int ret; sectbl_ops = xt_hook_ops_alloc(&security_table, ipt_do_table); - if (IS_ERR(sectbl_ops)) { - xt_unregister_template(&security_table); + if (IS_ERR(sectbl_ops)) return PTR_ERR(sectbl_ops); - } ret = register_pernet_subsys(&iptable_security_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(&security_table, + iptable_security_table_init); if (ret < 0) { - xt_unregister_template(&security_table); - kfree(sectbl_ops); - return ret; + unregister_pernet_subsys(&iptable_security_net_ops); + goto err_free; } + return 0; +err_free: + kfree(sectbl_ops); return ret; } static void __exit iptable_security_fini(void) { + xt_unregister_template(&security_table); unregister_pernet_subsys(&iptable_security_net_ops); kfree(sectbl_ops); - xt_unregister_template(&security_table); } module_init(iptable_security_init); diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c index 5080fa5fbf6a..f9c6755f5ec5 100644 --- a/net/ipv4/netfilter/nf_socket_ipv4.c +++ b/net/ipv4/netfilter/nf_socket_ipv4.c @@ -94,6 +94,9 @@ struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb, #endif int doff = 0; + if (ntohs(iph->frag_off) & IP_OFFSET) + return NULL; + if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) { struct tcphdr _hdr; struct udphdr *hp; diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 904a060a7330..f92fcc39fc4c 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -2469,10 +2469,10 @@ static int replace_nexthop_single(struct net *net, struct nexthop *old, goto err_notify; } - /* When replacing an IPv4 nexthop with an IPv6 nexthop, potentially + /* When replacing a nexthop with one of a different family, potentially * update IPv4 indication in all the groups using the nexthop. */ - if (oldi->family == AF_INET && newi->family == AF_INET6) { + if (oldi->family != newi->family) { list_for_each_entry(nhge, &old->grp_list, nh_list) { struct nexthop *nhp = nhge->nh_parent; struct nh_group *nhg; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2014a6408e93..432fa28e47d4 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3424,7 +3424,7 @@ int tcp_disconnect(struct sock *sk, int flags) icsk->icsk_rto = TCP_TIMEOUT_INIT; WRITE_ONCE(icsk->icsk_rto_min, TCP_RTO_MIN); WRITE_ONCE(icsk->icsk_delack_max, TCP_DELACK_MAX); - tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; + WRITE_ONCE(tp->snd_ssthresh, TCP_INFINITE_SSTHRESH); tcp_snd_cwnd_set(tp, TCP_INIT_CWND); tp->snd_cwnd_cnt = 0; tp->is_cwnd_limited = 0; @@ -3622,7 +3622,8 @@ static void tcp_enable_tx_delay(struct sock *sk, int val) if (delta && sk->sk_state == TCP_ESTABLISHED) { s64 srtt = (s64)tp->srtt_us + delta; - tp->srtt_us = clamp_t(s64, srtt, 1, ~0U); + WRITE_ONCE(tp->srtt_us, + clamp_t(s64, srtt, 1, ~0U)); /* Note: does not deal with non zero icsk_backoff */ tcp_set_rto(sk); @@ -4190,12 +4191,18 @@ static void tcp_get_info_chrono_stats(const struct tcp_sock *tp, struct tcp_info *info) { u64 stats[__TCP_CHRONO_MAX], total = 0; - enum tcp_chrono i; + enum tcp_chrono i, cur; + /* Following READ_ONCE()s pair with WRITE_ONCE()s in tcp_chrono_set(). + * This is because socket lock might not be owned by us at this point. + * This is best effort, tcp_get_timestamping_opt_stats() can + * see wrong values. A real fix would be too costly for TCP fast path. + */ + cur = READ_ONCE(tp->chrono_type); for (i = TCP_CHRONO_BUSY; i < __TCP_CHRONO_MAX; ++i) { - stats[i] = tp->chrono_stat[i - 1]; - if (i == tp->chrono_type) - stats[i] += tcp_jiffies32 - tp->chrono_start; + stats[i] = READ_ONCE(tp->chrono_stat[i - 1]); + if (i == cur) + stats[i] += tcp_jiffies32 - READ_ONCE(tp->chrono_start); stats[i] *= USEC_PER_SEC / HZ; total += stats[i]; } @@ -4427,9 +4434,9 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, nla_put_u64_64bit(stats, TCP_NLA_SNDBUF_LIMITED, info.tcpi_sndbuf_limited, TCP_NLA_PAD); nla_put_u64_64bit(stats, TCP_NLA_DATA_SEGS_OUT, - tp->data_segs_out, TCP_NLA_PAD); + READ_ONCE(tp->data_segs_out), TCP_NLA_PAD); nla_put_u64_64bit(stats, TCP_NLA_TOTAL_RETRANS, - tp->total_retrans, TCP_NLA_PAD); + READ_ONCE(tp->total_retrans), TCP_NLA_PAD); rate = READ_ONCE(sk->sk_pacing_rate); rate64 = (rate != ~0UL) ? rate : ~0ULL; @@ -4438,37 +4445,42 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, rate64 = tcp_compute_delivery_rate(tp); nla_put_u64_64bit(stats, TCP_NLA_DELIVERY_RATE, rate64, TCP_NLA_PAD); - nla_put_u32(stats, TCP_NLA_SND_CWND, tcp_snd_cwnd(tp)); - nla_put_u32(stats, TCP_NLA_REORDERING, tp->reordering); - nla_put_u32(stats, TCP_NLA_MIN_RTT, tcp_min_rtt(tp)); + nla_put_u32(stats, TCP_NLA_SND_CWND, READ_ONCE(tp->snd_cwnd)); + nla_put_u32(stats, TCP_NLA_REORDERING, READ_ONCE(tp->reordering)); + nla_put_u32(stats, TCP_NLA_MIN_RTT, data_race(tcp_min_rtt(tp))); nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, READ_ONCE(inet_csk(sk)->icsk_retransmits)); - nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited); - nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh); - nla_put_u32(stats, TCP_NLA_DELIVERED, tp->delivered); - nla_put_u32(stats, TCP_NLA_DELIVERED_CE, tp->delivered_ce); - - nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una); + nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, data_race(!!tp->rate_app_limited)); + nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, READ_ONCE(tp->snd_ssthresh)); + nla_put_u32(stats, TCP_NLA_DELIVERED, READ_ONCE(tp->delivered)); + nla_put_u32(stats, TCP_NLA_DELIVERED_CE, READ_ONCE(tp->delivered_ce)); + + nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, + max_t(int, 0, + READ_ONCE(tp->write_seq) - READ_ONCE(tp->snd_una))); nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state); - nla_put_u64_64bit(stats, TCP_NLA_BYTES_SENT, tp->bytes_sent, - TCP_NLA_PAD); - nla_put_u64_64bit(stats, TCP_NLA_BYTES_RETRANS, tp->bytes_retrans, + nla_put_u64_64bit(stats, TCP_NLA_BYTES_SENT, READ_ONCE(tp->bytes_sent), TCP_NLA_PAD); - nla_put_u32(stats, TCP_NLA_DSACK_DUPS, tp->dsack_dups); - nla_put_u32(stats, TCP_NLA_REORD_SEEN, tp->reord_seen); - nla_put_u32(stats, TCP_NLA_SRTT, tp->srtt_us >> 3); - nla_put_u16(stats, TCP_NLA_TIMEOUT_REHASH, tp->timeout_rehash); + nla_put_u64_64bit(stats, TCP_NLA_BYTES_RETRANS, + READ_ONCE(tp->bytes_retrans), TCP_NLA_PAD); + nla_put_u32(stats, TCP_NLA_DSACK_DUPS, READ_ONCE(tp->dsack_dups)); + nla_put_u32(stats, TCP_NLA_REORD_SEEN, READ_ONCE(tp->reord_seen)); + nla_put_u32(stats, TCP_NLA_SRTT, READ_ONCE(tp->srtt_us) >> 3); + nla_put_u16(stats, TCP_NLA_TIMEOUT_REHASH, + READ_ONCE(tp->timeout_rehash)); nla_put_u32(stats, TCP_NLA_BYTES_NOTSENT, - max_t(int, 0, tp->write_seq - tp->snd_nxt)); + max_t(int, 0, + READ_ONCE(tp->write_seq) - READ_ONCE(tp->snd_nxt))); nla_put_u64_64bit(stats, TCP_NLA_EDT, orig_skb->skb_mstamp_ns, TCP_NLA_PAD); if (ack_skb) nla_put_u8(stats, TCP_NLA_TTL, tcp_skb_ttl_or_hop_limit(ack_skb)); - nla_put_u32(stats, TCP_NLA_REHASH, tp->plb_rehash + tp->timeout_rehash); + nla_put_u32(stats, TCP_NLA_REHASH, + READ_ONCE(tp->plb_rehash) + READ_ONCE(tp->timeout_rehash)); return stats; } diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index a97cdf3e6af4..0a4b38b315fe 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -116,7 +116,8 @@ struct tcp_ao_key *tcp_ao_established_key(const struct sock *sk, { struct tcp_ao_key *key; - hlist_for_each_entry_rcu(key, &ao->head, node, lockdep_sock_is_held(sk)) { + hlist_for_each_entry_rcu(key, &ao->head, node, + sk_fullsock(sk) && lockdep_sock_is_held(sk)) { if ((sndid >= 0 && key->sndid != sndid) || (rcvid >= 0 && key->rcvid != rcvid)) continue; diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 1ddc20a399b0..aec7805b1d37 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -897,8 +897,8 @@ static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs) if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) { bbr->mode = BBR_DRAIN; /* drain queue we created */ - tcp_sk(sk)->snd_ssthresh = - bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT); + WRITE_ONCE(tcp_sk(sk)->snd_ssthresh, + bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT)); } /* fall through to check if in-flight is already small: */ if (bbr->mode == BBR_DRAIN && bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <= @@ -1043,7 +1043,7 @@ __bpf_kfunc static void bbr_init(struct sock *sk) struct bbr *bbr = inet_csk_ca(sk); bbr->prior_cwnd = 0; - tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; + WRITE_ONCE(tp->snd_ssthresh, TCP_INFINITE_SSTHRESH); bbr->rtt_cnt = 0; bbr->next_rtt_delivered = tp->delivered; bbr->prev_ca_state = TCP_CA_Open; diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 58358bf92e1b..65444ff14241 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -74,7 +74,7 @@ static void bictcp_init(struct sock *sk) bictcp_reset(ca); if (initial_ssthresh) - tcp_sk(sk)->snd_ssthresh = initial_ssthresh; + WRITE_ONCE(tcp_sk(sk)->snd_ssthresh, initial_ssthresh); } /* diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c index ceabfd690a29..0812c390aee5 100644 --- a/net/ipv4/tcp_cdg.c +++ b/net/ipv4/tcp_cdg.c @@ -162,7 +162,7 @@ static void tcp_cdg_hystart_update(struct sock *sk) NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTTRAINCWND, tcp_snd_cwnd(tp)); - tp->snd_ssthresh = tcp_snd_cwnd(tp); + WRITE_ONCE(tp->snd_ssthresh, tcp_snd_cwnd(tp)); return; } } @@ -181,7 +181,7 @@ static void tcp_cdg_hystart_update(struct sock *sk) NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTDELAYCWND, tcp_snd_cwnd(tp)); - tp->snd_ssthresh = tcp_snd_cwnd(tp); + WRITE_ONCE(tp->snd_ssthresh, tcp_snd_cwnd(tp)); } } } diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index ab78b5ae8d0e..119bf8cbb007 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -136,7 +136,7 @@ __bpf_kfunc static void cubictcp_init(struct sock *sk) bictcp_hystart_reset(sk); if (!hystart && initial_ssthresh) - tcp_sk(sk)->snd_ssthresh = initial_ssthresh; + WRITE_ONCE(tcp_sk(sk)->snd_ssthresh, initial_ssthresh); } __bpf_kfunc static void cubictcp_cwnd_event_tx_start(struct sock *sk) @@ -420,7 +420,7 @@ static void hystart_update(struct sock *sk, u32 delay) NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTTRAINCWND, tcp_snd_cwnd(tp)); - tp->snd_ssthresh = tcp_snd_cwnd(tp); + WRITE_ONCE(tp->snd_ssthresh, tcp_snd_cwnd(tp)); } } } @@ -440,7 +440,7 @@ static void hystart_update(struct sock *sk, u32 delay) NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTDELAYCWND, tcp_snd_cwnd(tp)); - tp->snd_ssthresh = tcp_snd_cwnd(tp); + WRITE_ONCE(tp->snd_ssthresh, tcp_snd_cwnd(tp)); } } } diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 96c99999e09d..274e628e7cf8 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -177,7 +177,7 @@ static void dctcp_react_to_loss(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); ca->loss_cwnd = tcp_snd_cwnd(tp); - tp->snd_ssthresh = max(tcp_snd_cwnd(tp) >> 1U, 2U); + WRITE_ONCE(tp->snd_ssthresh, max(tcp_snd_cwnd(tp) >> 1U, 2U)); } __bpf_kfunc static void dctcp_state(struct sock *sk, u8 new_state) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 021f745747c5..d5c9e65d9760 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -476,14 +476,14 @@ static bool tcp_accecn_process_option(struct tcp_sock *tp, static void tcp_count_delivered_ce(struct tcp_sock *tp, u32 ecn_count) { - tp->delivered_ce += ecn_count; + WRITE_ONCE(tp->delivered_ce, tp->delivered_ce + ecn_count); } /* Updates the delivered and delivered_ce counts */ static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered, bool ece_ack) { - tp->delivered += delivered; + WRITE_ONCE(tp->delivered, tp->delivered + delivered); if (tcp_ecn_mode_rfc3168(tp) && ece_ack) tcp_count_delivered_ce(tp, delivered); } @@ -1132,7 +1132,7 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us) tcp_bpf_rtt(sk, mrtt_us, srtt); } - tp->srtt_us = max(1U, srtt); + WRITE_ONCE(tp->srtt_us, max(1U, srtt)); } void tcp_update_pacing_rate(struct sock *sk) @@ -1246,7 +1246,7 @@ static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq, else if (tp->tlp_high_seq && tp->tlp_high_seq == end_seq) state->flag |= FLAG_DSACK_TLP; - tp->dsack_dups += dup_segs; + WRITE_ONCE(tp->dsack_dups, tp->dsack_dups + dup_segs); /* Skip the DSACK if dup segs weren't retransmitted by sender */ if (tp->dsack_dups > tp->total_retrans) return 0; @@ -1293,12 +1293,13 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq, tp->sacked_out, tp->undo_marker ? tp->undo_retrans : 0); #endif - tp->reordering = min_t(u32, (metric + mss - 1) / mss, - READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering)); + WRITE_ONCE(tp->reordering, + min_t(u32, (metric + mss - 1) / mss, + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering))); } /* This exciting event is worth to be remembered. 8) */ - tp->reord_seen++; + WRITE_ONCE(tp->reord_seen, tp->reord_seen + 1); NET_INC_STATS(sock_net(sk), ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER); } @@ -2439,9 +2440,10 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend) if (!tcp_limit_reno_sacked(tp)) return; - tp->reordering = min_t(u32, tp->packets_out + addend, - READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering)); - tp->reord_seen++; + WRITE_ONCE(tp->reordering, + min_t(u32, tp->packets_out + addend, + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering))); + WRITE_ONCE(tp->reord_seen, tp->reord_seen + 1); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER); } @@ -2565,7 +2567,7 @@ void tcp_enter_loss(struct sock *sk) (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { tp->prior_ssthresh = tcp_current_ssthresh(sk); tp->prior_cwnd = tcp_snd_cwnd(tp); - tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); + WRITE_ONCE(tp->snd_ssthresh, icsk->icsk_ca_ops->ssthresh(sk)); tcp_ca_event(sk, CA_EVENT_LOSS); tcp_init_undo(tp); } @@ -2579,8 +2581,8 @@ void tcp_enter_loss(struct sock *sk) reordering = READ_ONCE(net->ipv4.sysctl_tcp_reordering); if (icsk->icsk_ca_state <= TCP_CA_Disorder && tp->sacked_out >= reordering) - tp->reordering = min_t(unsigned int, tp->reordering, - reordering); + WRITE_ONCE(tp->reordering, + min_t(unsigned int, tp->reordering, reordering)); tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->snd_nxt; @@ -2858,7 +2860,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss) tcp_snd_cwnd_set(tp, icsk->icsk_ca_ops->undo_cwnd(sk)); if (tp->prior_ssthresh > tp->snd_ssthresh) { - tp->snd_ssthresh = tp->prior_ssthresh; + WRITE_ONCE(tp->snd_ssthresh, tp->prior_ssthresh); tcp_ecn_withdraw_cwr(tp); } } @@ -2976,7 +2978,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk) tp->prior_cwnd = tcp_snd_cwnd(tp); tp->prr_delivered = 0; tp->prr_out = 0; - tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); + WRITE_ONCE(tp->snd_ssthresh, inet_csk(sk)->icsk_ca_ops->ssthresh(sk)); tcp_ecn_queue_cwr(tp); } @@ -3118,7 +3120,7 @@ static void tcp_non_congestion_loss_retransmit(struct sock *sk) if (icsk->icsk_ca_state != TCP_CA_Loss) { tp->high_seq = tp->snd_nxt; - tp->snd_ssthresh = tcp_current_ssthresh(sk); + WRITE_ONCE(tp->snd_ssthresh, tcp_current_ssthresh(sk)); tp->prior_ssthresh = 0; tp->undo_marker = 0; tcp_set_ca_state(sk, TCP_CA_Loss); @@ -3910,7 +3912,7 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack) sock_owned_by_me((struct sock *)tp); tp->bytes_acked += delta; tcp_snd_sne_update(tp, ack); - tp->snd_una = ack; + WRITE_ONCE(tp->snd_una, ack); } static void tcp_rcv_sne_update(struct tcp_sock *tp, u32 seq) @@ -4284,11 +4286,15 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) goto old_ack; } - /* If the ack includes data we haven't sent yet, discard - * this segment (RFC793 Section 3.9). + /* If the ack includes data we haven't sent yet, drop the + * segment. RFC 793 Section 3.9 and RFC 5961 Section 5.2 + * require us to send an ACK back in that case. */ - if (after(ack, tp->snd_nxt)) + if (after(ack, tp->snd_nxt)) { + if (!(flag & FLAG_NO_CHALLENGE_ACK)) + tcp_send_challenge_ack(sk, false); return -SKB_DROP_REASON_TCP_ACK_UNSENT_DATA; + } if (after(ack, prior_snd_una)) { flag |= FLAG_SND_UNA_ADVANCED; @@ -6777,7 +6783,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE); /* SYN-data is counted as two separate packets in tcp_ack() */ if (tp->delivered > 1) - --tp->delivered; + WRITE_ONCE(tp->delivered, tp->delivered - 1); } tcp_fastopen_add_skb(sk, synack); @@ -7210,7 +7216,7 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) SKB_DR_SET(reason, NOT_SPECIFIED); switch (sk->sk_state) { case TCP_SYN_RECV: - tp->delivered++; /* SYN-ACK delivery isn't tracked in tcp_ack */ + WRITE_ONCE(tp->delivered, tp->delivered + 1); /* SYN-ACK delivery isn't tracked in tcp_ack */ if (!tp->srtt_us) tcp_synack_rtt_meas(sk, req); @@ -7238,7 +7244,7 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (sk->sk_socket) sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); - tp->snd_una = TCP_SKB_CB(skb)->ack_seq; + WRITE_ONCE(tp->snd_una, TCP_SKB_CB(skb)->ack_seq); tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale; tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 8fc24c3743c5..c0526cc03980 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1827,7 +1827,6 @@ INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) { enum skb_drop_reason reason; - struct sock *rsk; reason = psp_sk_rx_policy_check(sk, skb); if (reason) @@ -1863,24 +1862,21 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; if (nsk != sk) { reason = tcp_child_process(sk, nsk, skb); - if (reason) { - rsk = nsk; + sock_put(nsk); + if (reason) goto reset; - } return 0; } } else sock_rps_save_rxhash(sk, skb); reason = tcp_rcv_state_process(sk, skb); - if (reason) { - rsk = sk; + if (reason) goto reset; - } return 0; reset: - tcp_v4_send_reset(rsk, skb, sk_rst_convert_drop_reason(reason)); + tcp_v4_send_reset(sk, skb, sk_rst_convert_drop_reason(reason)); discard: sk_skb_reason_drop(sk, skb, reason); /* Be careful here. If this function gets more complicated and @@ -2193,8 +2189,10 @@ lookup: rst_reason = sk_rst_convert_drop_reason(drop_reason); tcp_v4_send_reset(nsk, skb, rst_reason); + sock_put(nsk); goto discard_and_relse; } + sock_put(nsk); sock_put(sk); return 0; } diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 06b1d5d3b6df..dc0c081fc1f3 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -490,13 +490,13 @@ void tcp_init_metrics(struct sock *sk) val = READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) ? 0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH); if (val) { - tp->snd_ssthresh = val; + WRITE_ONCE(tp->snd_ssthresh, val); if (tp->snd_ssthresh > tp->snd_cwnd_clamp) - tp->snd_ssthresh = tp->snd_cwnd_clamp; + WRITE_ONCE(tp->snd_ssthresh, tp->snd_cwnd_clamp); } val = tcp_metric_get(tm, TCP_METRIC_REORDERING); if (val && tp->reordering != val) - tp->reordering = val; + WRITE_ONCE(tp->reordering, val); crtt = tcp_metric_get(tm, TCP_METRIC_RTT); rcu_read_unlock(); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 199f0b579e89..e6092c3ac840 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -1012,6 +1012,6 @@ enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child, } bh_unlock_sock(child); - sock_put(child); + return reason; } diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c index a60662f4bdf9..f345897a68df 100644 --- a/net/ipv4/tcp_nv.c +++ b/net/ipv4/tcp_nv.c @@ -396,8 +396,8 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample) /* We have enough data to determine we are congested */ ca->nv_allow_cwnd_growth = 0; - tp->snd_ssthresh = - (nv_ssthresh_factor * max_win) >> 3; + WRITE_ONCE(tp->snd_ssthresh, + (nv_ssthresh_factor * max_win) >> 3); if (tcp_snd_cwnd(tp) - max_win > 2) { /* gap > 2, we do exponential cwnd decrease */ int dec; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8e99687526a6..f9d8755705f7 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -171,7 +171,7 @@ void tcp_cwnd_restart(struct sock *sk, s32 delta) tcp_ca_event(sk, CA_EVENT_CWND_RESTART); - tp->snd_ssthresh = tcp_current_ssthresh(sk); + WRITE_ONCE(tp->snd_ssthresh, tcp_current_ssthresh(sk)); restart_cwnd = min(restart_cwnd, cwnd); while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd) @@ -1688,8 +1688,10 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, if (skb->len != tcp_header_size) { tcp_event_data_sent(tp, sk); - tp->data_segs_out += tcp_skb_pcount(skb); - tp->bytes_sent += skb->len - tcp_header_size; + WRITE_ONCE(tp->data_segs_out, + tp->data_segs_out + tcp_skb_pcount(skb)); + WRITE_ONCE(tp->bytes_sent, + tp->bytes_sent + skb->len - tcp_header_size); } if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) @@ -2142,7 +2144,7 @@ static void tcp_cwnd_application_limited(struct sock *sk) u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk)); u32 win_used = max(tp->snd_cwnd_used, init_win); if (win_used < tcp_snd_cwnd(tp)) { - tp->snd_ssthresh = tcp_current_ssthresh(sk); + WRITE_ONCE(tp->snd_ssthresh, tcp_current_ssthresh(sk)); tcp_snd_cwnd_set(tp, (tcp_snd_cwnd(tp) + win_used) >> 1); } tp->snd_cwnd_used = 0; @@ -3642,8 +3644,8 @@ start: TCP_ADD_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS, segs); if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); - tp->total_retrans += segs; - tp->bytes_retrans += skb->len; + WRITE_ONCE(tp->total_retrans, tp->total_retrans + segs); + WRITE_ONCE(tp->bytes_retrans, tp->bytes_retrans + skb->len); /* make sure skb->data is aligned on arches that require it * and check if ack-trimming & collapsing extended the headroom @@ -4152,7 +4154,7 @@ static void tcp_connect_init(struct sock *sk) tp->snd_wnd = 0; tcp_init_wl(tp, 0); tcp_write_queue_purge(sk); - tp->snd_una = tp->write_seq; + WRITE_ONCE(tp->snd_una, tp->write_seq); tp->snd_sml = tp->write_seq; tp->snd_up = tp->write_seq; WRITE_ONCE(tp->snd_nxt, tp->write_seq); @@ -4646,7 +4648,8 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) * However in this case, we are dealing with a passive fastopen * socket thus we can change total_retrans value. */ - tcp_sk_rw(sk)->total_retrans++; + WRITE_ONCE(tcp_sk_rw(sk)->total_retrans, + tcp_sk_rw(sk)->total_retrans + 1); } trace_tcp_retransmit_synack(sk, req); WRITE_ONCE(req->num_retrans, req->num_retrans + 1); diff --git a/net/ipv4/tcp_plb.c b/net/ipv4/tcp_plb.c index 68ccdb9a5412..c11a0cd3f8fe 100644 --- a/net/ipv4/tcp_plb.c +++ b/net/ipv4/tcp_plb.c @@ -80,7 +80,7 @@ void tcp_plb_check_rehash(struct sock *sk, struct tcp_plb_state *plb) sk_rethink_txhash(sk); plb->consec_cong_rounds = 0; - tcp_sk(sk)->plb_rehash++; + WRITE_ONCE(tcp_sk(sk)->plb_rehash, tcp_sk(sk)->plb_rehash + 1); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPLBREHASH); } EXPORT_SYMBOL_GPL(tcp_plb_check_rehash); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index ea99988795e7..322db13333c7 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -50,7 +50,8 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk) u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when) { const struct inet_connection_sock *icsk = inet_csk(sk); - u32 remaining, user_timeout; + u32 user_timeout; + s32 remaining; s32 elapsed; user_timeout = READ_ONCE(icsk->icsk_user_timeout); @@ -61,7 +62,7 @@ u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when) if (unlikely(elapsed < 0)) elapsed = 0; remaining = msecs_to_jiffies(user_timeout) - elapsed; - remaining = max_t(u32, remaining, TCP_TIMEOUT_MIN); + remaining = max_t(int, remaining, TCP_TIMEOUT_MIN); return min_t(u32, remaining, when); } @@ -297,7 +298,7 @@ static int tcp_write_timeout(struct sock *sk) } if (sk_rethink_txhash(sk)) { - tp->timeout_rehash++; + WRITE_ONCE(tp->timeout_rehash, tp->timeout_rehash + 1); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTREHASH); } diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 950a66966059..574453af6bc0 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -245,7 +245,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) */ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), (u32)target_cwnd + 1)); - tp->snd_ssthresh = tcp_vegas_ssthresh(tp); + WRITE_ONCE(tp->snd_ssthresh, + tcp_vegas_ssthresh(tp)); } else if (tcp_in_slow_start(tp)) { /* Slow start. */ @@ -261,8 +262,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) * we slow down. */ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - 1); - tp->snd_ssthresh - = tcp_vegas_ssthresh(tp); + WRITE_ONCE(tp->snd_ssthresh, + tcp_vegas_ssthresh(tp)); } else if (diff < alpha) { /* We don't have enough extra packets * in the network, so speed up. @@ -280,7 +281,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) else if (tcp_snd_cwnd(tp) > tp->snd_cwnd_clamp) tcp_snd_cwnd_set(tp, tp->snd_cwnd_clamp); - tp->snd_ssthresh = tcp_current_ssthresh(sk); + WRITE_ONCE(tp->snd_ssthresh, tcp_current_ssthresh(sk)); } /* Wipe the slate clean for the next RTT. */ diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index c6e97141eef2..b5a42adfd6ca 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -244,11 +244,11 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event) switch (event) { case CA_EVENT_COMPLETE_CWR: - tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); + WRITE_ONCE(tp->snd_ssthresh, tcp_westwood_bw_rttmin(sk)); tcp_snd_cwnd_set(tp, tp->snd_ssthresh); break; case CA_EVENT_LOSS: - tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); + WRITE_ONCE(tp->snd_ssthresh, tcp_westwood_bw_rttmin(sk)); /* Update RTT_min when next ack arrives */ w->reset_rtt_min = 1; break; diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index b22b3dccd05e..9e581154f18f 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -147,7 +147,8 @@ do_vegas: tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), yeah->reno_count)); - tp->snd_ssthresh = tcp_snd_cwnd(tp); + WRITE_ONCE(tp->snd_ssthresh, + tcp_snd_cwnd(tp)); } if (yeah->reno_count <= 2) diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index c024aa77f25b..c3806c6ac96f 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -164,7 +164,7 @@ config IPV6_SIT select INET_TUNNEL select NET_IP_TUNNEL select IPV6_NDISC_NODETYPE - default y + default m help Tunneling means encapsulating data of one protocol type within another protocol and sending it over a channel that understands the @@ -172,7 +172,7 @@ config IPV6_SIT into IPv4 packets. This is useful if you want to connect two IPv6 networks over an IPv4-only path. - Saying M here will produce a module called sit. If unsure, say Y. + Saying M here will produce a module called sit. If unsure, say M. config IPV6_SIT_6RD bool "IPv6: IPv6 Rapid Deployment (6RD)" diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index cb26beea4398..de1e68199a01 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -317,14 +317,19 @@ static void ah6_output_done(void *data, int err) struct ipv6hdr *top_iph = ipv6_hdr(skb); struct ip_auth_hdr *ah = ip_auth_hdr(skb); struct tmp_ext *iph_ext; + int seqhi_len = 0; + __be32 *seqhi; extlen = skb_network_header_len(skb) - sizeof(struct ipv6hdr); if (extlen) extlen += sizeof(*iph_ext); + if (x->props.flags & XFRM_STATE_ESN) + seqhi_len = sizeof(*seqhi); iph_base = AH_SKB_CB(skb)->tmp; iph_ext = ah_tmp_ext(iph_base); - icv = ah_tmp_icv(iph_ext, extlen); + seqhi = (__be32 *)((char *)iph_ext + extlen); + icv = ah_tmp_icv(seqhi, seqhi_len); memcpy(ah->auth_data, icv, ahp->icv_trunc_len); memcpy(top_iph, iph_base, IPV6HDR_BASELEN); @@ -471,13 +476,18 @@ static void ah6_input_done(void *data, int err) struct ip_auth_hdr *ah = ip_auth_hdr(skb); int hdr_len = skb_network_header_len(skb); int ah_hlen = ipv6_authlen(ah); + int seqhi_len = 0; + __be32 *seqhi; if (err) goto out; + if (x->props.flags & XFRM_STATE_ESN) + seqhi_len = sizeof(*seqhi); work_iph = AH_SKB_CB(skb)->tmp; auth_data = ah_tmp_auth(work_iph, hdr_len); - icv = ah_tmp_icv(auth_data, ahp->icv_trunc_len); + seqhi = (__be32 *)(auth_data + ahp->icv_trunc_len); + icv = ah_tmp_icv(seqhi, seqhi_len); err = crypto_memneq(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0; if (err) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 9f75313734f8..9c06c5a1419d 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -915,7 +915,8 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) nfrags = 1; goto skip_cow; - } else if (!skb_has_frag_list(skb)) { + } else if (!skb_has_frag_list(skb) && + !skb_has_shared_frag(skb)) { nfrags = skb_shinfo(skb)->nr_frags; nfrags++; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 95558fd6f447..03cbce842c1a 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -491,6 +491,7 @@ static int ipv6_rpl_srh_rcv(struct sk_buff *skb) struct net *net = dev_net(skb->dev); struct inet6_dev *idev; struct ipv6hdr *oldhdr; + unsigned int chdr_len; unsigned char *buf; int accept_rpl_seg; int i, err; @@ -592,8 +593,10 @@ looped_back: skb_pull(skb, ((hdr->hdrlen + 1) << 3)); skb_postpull_rcsum(skb, oldhdr, sizeof(struct ipv6hdr) + ((hdr->hdrlen + 1) << 3)); - if (unlikely(!hdr->segments_left)) { - if (pskb_expand_head(skb, sizeof(struct ipv6hdr) + ((chdr->hdrlen + 1) << 3), 0, + chdr_len = sizeof(struct ipv6hdr) + ((chdr->hdrlen + 1) << 3); + if (unlikely(!hdr->segments_left || + skb_headroom(skb) < chdr_len + skb->mac_len)) { + if (pskb_expand_head(skb, chdr_len + skb->mac_len, 0, GFP_ATOMIC)) { __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); @@ -603,7 +606,7 @@ looped_back: oldhdr = ipv6_hdr(skb); } - skb_push(skb, ((chdr->hdrlen + 1) << 3) + sizeof(struct ipv6hdr)); + skb_push(skb, chdr_len); skb_reset_network_header(skb); skb_mac_header_rebuild(skb); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index 49e31e4ae7b7..9d06d487e8b1 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -73,6 +73,7 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, __be16 *frag_offp) { u8 nexthdr = *nexthdrp; + int exthdr_cnt = 0; *frag_offp = 0; @@ -82,6 +83,8 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, if (nexthdr == NEXTHDR_NONE) return -1; + if (unlikely(exthdr_cnt++ >= IP6_MAX_EXT_HDRS_CNT)) + return -1; hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); if (!hp) return -1; @@ -190,6 +193,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, { unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr); u8 nexthdr = ipv6_hdr(skb)->nexthdr; + int exthdr_cnt = 0; bool found; if (fragoff) @@ -216,6 +220,9 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, return -ENOENT; } + if (unlikely(exthdr_cnt++ >= IP6_MAX_EXT_HDRS_CNT)) + return -EBADMSG; + hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); if (!hp) return -EBADMSG; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 799d9e9ac45d..efb23807a026 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -1104,7 +1104,6 @@ static int icmpv6_rcv(struct sk_buff *skb) struct net *net = dev_net_rcu(skb->dev); struct net_device *dev = icmp6_dev(skb); struct inet6_dev *idev = __in6_dev_get(dev); - const struct in6_addr *saddr, *daddr; struct icmp6hdr *hdr; u8 type; @@ -1135,12 +1134,10 @@ static int icmpv6_rcv(struct sk_buff *skb) __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INMSGS); - saddr = &ipv6_hdr(skb)->saddr; - daddr = &ipv6_hdr(skb)->daddr; - if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) { net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n", - saddr, daddr); + &ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr); goto csum_error; } @@ -1220,7 +1217,8 @@ static int icmpv6_rcv(struct sk_buff *skb) break; net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n", - saddr, daddr); + &ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr); /* * error of unknown type. diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index c92f98c6f6ec..b1ccdf0dc646 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -36,11 +36,11 @@ /* FL hash table */ #define FL_MAX_PER_SOCK 32 -#define FL_MAX_SIZE 4096 +#define FL_MAX_SIZE 8192 #define FL_HASH_MASK 255 #define FL_HASH(l) (ntohl(l)&FL_HASH_MASK) -static atomic_t fl_size = ATOMIC_INIT(0); +static int fl_size; static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1]; static void ip6_fl_gc(struct timer_list *unused); @@ -162,8 +162,9 @@ static void ip6_fl_gc(struct timer_list *unused) ttd = fl->expires; if (time_after_eq(now, ttd)) { *flp = fl->next; + fl_size--; + fl->fl_net->ipv6.flowlabel_count--; fl_free(fl); - atomic_dec(&fl_size); continue; } if (!sched || time_before(ttd, sched)) @@ -172,7 +173,7 @@ static void ip6_fl_gc(struct timer_list *unused) flp = &fl->next; } } - if (!sched && atomic_read(&fl_size)) + if (!sched && fl_size) sched = now + FL_MAX_LINGER; if (sched) { mod_timer(&ip6_fl_gc_timer, sched); @@ -196,7 +197,8 @@ static void __net_exit ip6_fl_purge(struct net *net) atomic_read(&fl->users) == 0) { *flp = fl->next; fl_free(fl); - atomic_dec(&fl_size); + fl_size--; + net->ipv6.flowlabel_count--; continue; } flp = &fl->next; @@ -210,10 +212,10 @@ static struct ip6_flowlabel *fl_intern(struct net *net, { struct ip6_flowlabel *lfl; + lockdep_assert_held(&ip6_fl_lock); + fl->label = label & IPV6_FLOWLABEL_MASK; - rcu_read_lock(); - spin_lock_bh(&ip6_fl_lock); if (label == 0) { for (;;) { fl->label = htonl(get_random_u32())&IPV6_FLOWLABEL_MASK; @@ -235,8 +237,6 @@ static struct ip6_flowlabel *fl_intern(struct net *net, lfl = __fl_lookup(net, fl->label); if (lfl) { atomic_inc(&lfl->users); - spin_unlock_bh(&ip6_fl_lock); - rcu_read_unlock(); return lfl; } } @@ -244,9 +244,8 @@ static struct ip6_flowlabel *fl_intern(struct net *net, fl->lastuse = jiffies; fl->next = fl_ht[FL_HASH(fl->label)]; rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl); - atomic_inc(&fl_size); - spin_unlock_bh(&ip6_fl_lock); - rcu_read_unlock(); + fl_size++; + net->ipv6.flowlabel_count++; return NULL; } @@ -464,10 +463,17 @@ done: static int mem_check(struct sock *sk) { - int room = FL_MAX_SIZE - atomic_read(&fl_size); + const int unpriv_total_limit = FL_MAX_SIZE - (FL_MAX_SIZE / 4); + const int unpriv_user_limit = unpriv_total_limit / 2; + struct net *net = sock_net(sk); + int room; struct ipv6_fl_socklist *sfl; int count = 0; + lockdep_assert_held(&ip6_fl_lock); + + room = FL_MAX_SIZE - fl_size; + if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK) return 0; @@ -478,7 +484,9 @@ static int mem_check(struct sock *sk) if (room <= 0 || ((count >= FL_MAX_PER_SOCK || - (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) && + (count > 0 && room < FL_MAX_SIZE / 2) || + room < FL_MAX_SIZE / 4 || + net->ipv6.flowlabel_count >= unpriv_user_limit) && !capable(CAP_NET_ADMIN))) return -ENOBUFS; @@ -692,11 +700,19 @@ release: if (!sfl1) goto done; + rcu_read_lock(); + spin_lock_bh(&ip6_fl_lock); err = mem_check(sk); + if (err == 0) + fl1 = fl_intern(net, fl, freq->flr_label); + else + fl1 = NULL; + spin_unlock_bh(&ip6_fl_lock); + rcu_read_unlock(); + if (err != 0) goto done; - fl1 = fl_intern(net, fl, freq->flr_label); if (fl1) goto recheck; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 63fc8556b475..365b4059eb20 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -2262,10 +2262,11 @@ static int ip6erspan_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id); + struct ip6_tnl *t = netdev_priv(dev); struct __ip6_tnl_parm p; - struct ip6_tnl *t; + struct ip6gre_net *ign; + ign = net_generic(t->net, ip6gre_net_id); t = ip6gre_changelink_common(dev, tb, data, &p, extack); if (IS_ERR(t)) return PTR_ERR(t); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 967b07aeb683..8972863c93ee 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -403,6 +403,7 @@ INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *)); void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr, bool have_final) { + int exthdr_cnt = IP6CB(skb)->flags & IP6SKB_HOPBYHOP ? 1 : 0; const struct inet6_protocol *ipprot; struct inet6_dev *idev; unsigned int nhoff; @@ -487,6 +488,10 @@ resubmit_final: nexthdr = ret; goto resubmit_final; } else { + if (unlikely(exthdr_cnt++ >= IP6_MAX_EXT_HDRS_CNT)) { + SKB_DR_SET(reason, IPV6_TOO_MANY_EXTHDRS); + goto discard; + } goto resubmit; } } else if (ret == 0) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7e92909ab5be..c14adcdd4396 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -468,6 +468,7 @@ static int ip6_forward_proxy_check(struct sk_buff *skb) default: break; } + hdr = ipv6_hdr(skb); } /* @@ -582,6 +583,8 @@ int ip6_forward(struct sk_buff *skb) if (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) && pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev)) { int proxied = ip6_forward_proxy_check(skb); + + hdr = ipv6_hdr(skb); if (proxied > 0) { /* It's tempting to decrease the hop limit * here by 1, as we do at the end of the @@ -1794,6 +1797,8 @@ alloc_new_skb: if (err < 0) goto error; copy = err; + if (!(flags & MSG_NO_SHARED_FRAGS)) + skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG; wmem_alloc_delta += copy; } else if (!zc) { int i = skb_shinfo(skb)->nr_frags; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 46bc06506470..9d1037ac082f 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -62,6 +62,8 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("ip6tnl"); MODULE_ALIAS_NETDEV("ip6tnl0"); +#define IP6_TUNNEL_MAX_DEST_TLVS 8 + #define IP6_TUNNEL_HASH_SIZE_SHIFT 5 #define IP6_TUNNEL_HASH_SIZE (1 << IP6_TUNNEL_HASH_SIZE_SHIFT) @@ -397,11 +399,15 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) unsigned int nhoff = raw - skb->data; unsigned int off = nhoff + sizeof(*ipv6h); u8 nexthdr = ipv6h->nexthdr; + int exthdr_cnt = 0; while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) { struct ipv6_opt_hdr *hdr; u16 optlen; + if (unlikely(exthdr_cnt++ >= IP6_MAX_EXT_HDRS_CNT)) + break; + if (!pskb_may_pull(skb, off + sizeof(*hdr))) break; @@ -425,11 +431,15 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) break; } if (nexthdr == NEXTHDR_DEST) { + int tlv_cnt = 0; u16 i = 2; while (1) { struct ipv6_tlv_tnl_enc_lim *tel; + if (unlikely(tlv_cnt++ >= IP6_TUNNEL_MAX_DEST_TLVS)) + break; + /* No more room for encapsulation limit */ if (i + sizeof(*tel) > optlen) break; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index d585ac3c1113..9d9c3763f2f5 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1713,12 +1713,10 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) static void __ip6t_unregister_table(struct net *net, struct xt_table *table) { - struct xt_table_info *private; - void *loc_cpu_entry; + struct xt_table_info *private = table->private; struct module *table_owner = table->me; struct ip6t_entry *iter; - - private = xt_unregister_table(table); + void *loc_cpu_entry; /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries; @@ -1727,19 +1725,18 @@ static void __ip6t_unregister_table(struct net *net, struct xt_table *table) if (private->number > private->initial_entries) module_put(table_owner); xt_free_table_info(private); + kfree(table); } int ip6t_register_table(struct net *net, const struct xt_table *table, const struct ip6t_replace *repl, const struct nf_hook_ops *template_ops) { - struct nf_hook_ops *ops; - unsigned int num_ops; - int ret, i; - struct xt_table_info *newinfo; struct xt_table_info bootstrap = {0}; - void *loc_cpu_entry; + struct xt_table_info *newinfo; struct xt_table *new_table; + void *loc_cpu_entry; + int ret; newinfo = xt_alloc_table_info(repl->size); if (!newinfo) @@ -1754,7 +1751,7 @@ int ip6t_register_table(struct net *net, const struct xt_table *table, return ret; } - new_table = xt_register_table(net, table, &bootstrap, newinfo); + new_table = xt_register_table(net, table, template_ops, &bootstrap, newinfo); if (IS_ERR(new_table)) { struct ip6t_entry *iter; @@ -1764,48 +1761,12 @@ int ip6t_register_table(struct net *net, const struct xt_table *table, return PTR_ERR(new_table); } - if (!template_ops) - return 0; - - num_ops = hweight32(table->valid_hooks); - if (num_ops == 0) { - ret = -EINVAL; - goto out_free; - } - - ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL); - if (!ops) { - ret = -ENOMEM; - goto out_free; - } - - for (i = 0; i < num_ops; i++) - ops[i].priv = new_table; - - new_table->ops = ops; - - ret = nf_register_net_hooks(net, ops, num_ops); - if (ret != 0) - goto out_free; - return ret; - -out_free: - __ip6t_unregister_table(net, new_table); - return ret; -} - -void ip6t_unregister_table_pre_exit(struct net *net, const char *name) -{ - struct xt_table *table = xt_find_table(net, NFPROTO_IPV6, name); - - if (table) - nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks)); } void ip6t_unregister_table_exit(struct net *net, const char *name) { - struct xt_table *table = xt_find_table(net, NFPROTO_IPV6, name); + struct xt_table *table = xt_unregister_table_exit(net, NFPROTO_IPV6, name); if (table) __ip6t_unregister_table(net, table); @@ -1894,7 +1855,6 @@ static void __exit ip6_tables_fini(void) } EXPORT_SYMBOL(ip6t_register_table); -EXPORT_SYMBOL(ip6t_unregister_table_pre_exit); EXPORT_SYMBOL(ip6t_unregister_table_exit); EXPORT_SYMBOL(ip6t_do_table); diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index e8992693e14a..b074fc477676 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -60,7 +60,7 @@ static int __net_init ip6table_filter_net_init(struct net *net) static void __net_exit ip6table_filter_net_pre_exit(struct net *net) { - ip6t_unregister_table_pre_exit(net, "filter"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "filter"); } static void __net_exit ip6table_filter_net_exit(struct net *net) @@ -76,32 +76,32 @@ static struct pernet_operations ip6table_filter_net_ops = { static int __init ip6table_filter_init(void) { - int ret = xt_register_template(&packet_filter, - ip6table_filter_table_init); - - if (ret < 0) - return ret; + int ret; filter_ops = xt_hook_ops_alloc(&packet_filter, ip6t_do_table); - if (IS_ERR(filter_ops)) { - xt_unregister_template(&packet_filter); + if (IS_ERR(filter_ops)) return PTR_ERR(filter_ops); - } ret = register_pernet_subsys(&ip6table_filter_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(&packet_filter, ip6table_filter_table_init); if (ret < 0) { - xt_unregister_template(&packet_filter); - kfree(filter_ops); - return ret; + unregister_pernet_subsys(&ip6table_filter_net_ops); + goto err_free; } + return 0; +err_free: + kfree(filter_ops); return ret; } static void __exit ip6table_filter_fini(void) { - unregister_pernet_subsys(&ip6table_filter_net_ops); xt_unregister_template(&packet_filter); + unregister_pernet_subsys(&ip6table_filter_net_ops); kfree(filter_ops); } diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 8dd4cd0c47bd..e6ee036a9b2c 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -89,7 +89,7 @@ static int ip6table_mangle_table_init(struct net *net) static void __net_exit ip6table_mangle_net_pre_exit(struct net *net) { - ip6t_unregister_table_pre_exit(net, "mangle"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "mangle"); } static void __net_exit ip6table_mangle_net_exit(struct net *net) @@ -104,32 +104,33 @@ static struct pernet_operations ip6table_mangle_net_ops = { static int __init ip6table_mangle_init(void) { - int ret = xt_register_template(&packet_mangler, - ip6table_mangle_table_init); - - if (ret < 0) - return ret; + int ret; mangle_ops = xt_hook_ops_alloc(&packet_mangler, ip6table_mangle_hook); - if (IS_ERR(mangle_ops)) { - xt_unregister_template(&packet_mangler); + if (IS_ERR(mangle_ops)) return PTR_ERR(mangle_ops); - } ret = register_pernet_subsys(&ip6table_mangle_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(&packet_mangler, + ip6table_mangle_table_init); if (ret < 0) { - xt_unregister_template(&packet_mangler); - kfree(mangle_ops); - return ret; + unregister_pernet_subsys(&ip6table_mangle_net_ops); + goto err_free; } + return 0; +err_free: + kfree(mangle_ops); return ret; } static void __exit ip6table_mangle_fini(void) { - unregister_pernet_subsys(&ip6table_mangle_net_ops); xt_unregister_template(&packet_mangler); + unregister_pernet_subsys(&ip6table_mangle_net_ops); kfree(mangle_ops); } diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index e119d4f090cc..c2394e2c94b5 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -81,7 +81,7 @@ static int ip6t_nat_register_lookups(struct net *net) while (i) nf_nat_ipv6_unregister_fn(net, &ops[--i]); - kfree(ops); + kfree_rcu(ops, rcu); return ret; } } @@ -102,7 +102,7 @@ static void ip6t_nat_unregister_lookups(struct net *net) for (i = 0; i < ARRAY_SIZE(nf_nat_ipv6_ops); i++) nf_nat_ipv6_unregister_fn(net, &ops[i]); - kfree(ops); + kfree_rcu(ops, rcu); } static int ip6table_nat_table_init(struct net *net) @@ -121,8 +121,11 @@ static int ip6table_nat_table_init(struct net *net) } ret = ip6t_nat_register_lookups(net); - if (ret < 0) + if (ret < 0) { + xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "nat"); + synchronize_rcu(); ip6t_unregister_table_exit(net, "nat"); + } kfree(repl); return ret; @@ -131,6 +134,7 @@ static int ip6table_nat_table_init(struct net *net) static void __net_exit ip6table_nat_net_pre_exit(struct net *net) { ip6t_nat_unregister_lookups(net); + xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "nat"); } static void __net_exit ip6table_nat_net_exit(struct net *net) diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index fc9f6754028f..3b161ee875bc 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -52,7 +52,7 @@ static int ip6table_raw_table_init(struct net *net) static void __net_exit ip6table_raw_net_pre_exit(struct net *net) { - ip6t_unregister_table_pre_exit(net, "raw"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "raw"); } static void __net_exit ip6table_raw_net_exit(struct net *net) @@ -75,31 +75,31 @@ static int __init ip6table_raw_init(void) pr_info("Enabling raw table before defrag\n"); } - ret = xt_register_template(table, ip6table_raw_table_init); - if (ret < 0) - return ret; - /* Register hooks */ rawtable_ops = xt_hook_ops_alloc(table, ip6t_do_table); - if (IS_ERR(rawtable_ops)) { - xt_unregister_template(table); + if (IS_ERR(rawtable_ops)) return PTR_ERR(rawtable_ops); - } ret = register_pernet_subsys(&ip6table_raw_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(table, ip6table_raw_table_init); if (ret < 0) { - kfree(rawtable_ops); - xt_unregister_template(table); - return ret; + unregister_pernet_subsys(&ip6table_raw_net_ops); + goto err_free; } + return 0; +err_free: + kfree(rawtable_ops); return ret; } static void __exit ip6table_raw_fini(void) { - unregister_pernet_subsys(&ip6table_raw_net_ops); xt_unregister_template(&packet_raw); + unregister_pernet_subsys(&ip6table_raw_net_ops); kfree(rawtable_ops); } diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index 4df14a9bae78..4bd5d97b8ab6 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -49,7 +49,7 @@ static int ip6table_security_table_init(struct net *net) static void __net_exit ip6table_security_net_pre_exit(struct net *net) { - ip6t_unregister_table_pre_exit(net, "security"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "security"); } static void __net_exit ip6table_security_net_exit(struct net *net) @@ -64,32 +64,33 @@ static struct pernet_operations ip6table_security_net_ops = { static int __init ip6table_security_init(void) { - int ret = xt_register_template(&security_table, - ip6table_security_table_init); - - if (ret < 0) - return ret; + int ret; sectbl_ops = xt_hook_ops_alloc(&security_table, ip6t_do_table); - if (IS_ERR(sectbl_ops)) { - xt_unregister_template(&security_table); + if (IS_ERR(sectbl_ops)) return PTR_ERR(sectbl_ops); - } ret = register_pernet_subsys(&ip6table_security_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(&security_table, + ip6table_security_table_init); if (ret < 0) { - kfree(sectbl_ops); - xt_unregister_template(&security_table); - return ret; + unregister_pernet_subsys(&ip6table_security_net_ops); + goto err_free; } + return 0; +err_free: + kfree(sectbl_ops); return ret; } static void __exit ip6table_security_fini(void) { - unregister_pernet_subsys(&ip6table_security_net_ops); xt_unregister_template(&security_table); + unregister_pernet_subsys(&ip6table_security_net_ops); kfree(sectbl_ops); } diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c index ced8bd44828e..893f2aeb4711 100644 --- a/net/ipv6/netfilter/nf_socket_ipv6.c +++ b/net/ipv6/netfilter/nf_socket_ipv6.c @@ -100,6 +100,7 @@ struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb, const struct in6_addr *daddr = NULL, *saddr = NULL; struct ipv6hdr *iph = ipv6_hdr(skb), ipv6_var; struct sk_buff *data_skb = NULL; + unsigned short fragoff = 0; int doff = 0; int thoff = 0, tproto; #if IS_ENABLED(CONFIG_NF_CONNTRACK) @@ -107,8 +108,8 @@ struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb, struct nf_conn const *ct; #endif - tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); - if (tproto < 0) { + tproto = ipv6_find_hdr(skb, &thoff, -1, &fragoff, NULL); + if (tproto < 0 || fragoff) { pr_debug("unable to find transport header in IPv6 packet, dropping\n"); return NULL; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 19eb6b702227..e3d355d1fbd6 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1645,6 +1645,10 @@ static unsigned int fib6_mtu(const struct fib6_result *res) rcu_read_lock(); idev = __in6_dev_get(dev); + if (!idev) { + rcu_read_unlock(); + return 0; + } mtu = READ_ONCE(idev->cnf.mtu6); rcu_read_unlock(); } @@ -4995,6 +4999,7 @@ static int fib6_ifdown(struct fib6_info *rt, void *p_arg) rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) break; rt->fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN; + fib6_update_sernum(net, rt); rt6_multipath_rebalance(rt); break; } diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c index c7942cf65567..4e10adcd70e8 100644 --- a/net/ipv6/rpl_iptunnel.c +++ b/net/ipv6/rpl_iptunnel.c @@ -287,7 +287,16 @@ static int rpl_input(struct sk_buff *skb) if (!dst) { ip6_route_input(skb); + + /* ip6_route_input() sets a NOREF dst; force a refcount on it + * before caching or further use. + */ + skb_dst_force(skb); dst = skb_dst(skb); + if (unlikely(!dst)) { + err = -ENETUNREACH; + goto drop; + } /* cache only if we don't create a dst reference loop */ if (!dst->error && lwtst != dst->lwtstate) { diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 97b50d9b1365..4c45c0a77d75 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -515,7 +515,16 @@ static int seg6_input_core(struct net *net, struct sock *sk, if (!dst) { ip6_route_input(skb); + + /* ip6_route_input() sets a NOREF dst; force a refcount on it + * before caching or further use. + */ + skb_dst_force(skb); dst = skb_dst(skb); + if (unlikely(!dst)) { + err = -ENETUNREACH; + goto drop; + } /* cache only if we don't create a dst reference loop */ if (!dst->error && lwtst != dst->lwtstate) { @@ -746,7 +755,8 @@ static int seg6_build_state(struct net *net, struct nlattr *nla, newts->type = LWTUNNEL_ENCAP_SEG6; newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT; - if (tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP) + if (tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP && + tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP_RED) newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; newts->headroom = seg6_lwt_headroom(tuninfo); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2c3f7a739709..d13d49bfef19 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -288,8 +288,10 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr, saddr = &fl6->saddr; err = inet_bhash2_update_saddr(sk, saddr, AF_INET6); - if (err) + if (err) { + dst_release(dst); goto failure; + } } /* set the source address */ @@ -1617,12 +1619,13 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (sk->sk_state == TCP_LISTEN) { struct sock *nsk = tcp_v6_cookie_check(sk, skb); + if (!nsk) + return 0; if (nsk != sk) { - if (nsk) { - reason = tcp_child_process(sk, nsk, skb); - if (reason) - goto reset; - } + reason = tcp_child_process(sk, nsk, skb); + sock_put(nsk); + if (reason) + goto reset; return 0; } } else @@ -1827,8 +1830,10 @@ lookup: rst_reason = sk_rst_convert_drop_reason(drop_reason); tcp_v6_send_reset(nsk, skb, rst_reason); + sock_put(nsk); goto discard_and_relse; } + sock_put(nsk); sock_put(sk); return 0; } diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c index ea2f805d3b01..9b586fcec485 100644 --- a/net/ipv6/xfrm6_protocol.c +++ b/net/ipv6/xfrm6_protocol.c @@ -88,8 +88,10 @@ int xfrm6_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, dst = ip6_route_input_lookup(dev_net(skb->dev), skb->dev, &fl6, skb, flags); - if (dst->error) + if (dst->error) { + dst_release(dst); goto drop; + } skb_dst_set(skb, dst); } diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 59d593bb5d18..1b210db3119e 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -520,8 +520,10 @@ static int llc_ui_connect(struct socket *sock, struct sockaddr_unsized *uaddr, if (sk->sk_state == TCP_SYN_SENT) { const long timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); - if (!timeo || !llc_ui_wait_for_conn(sk, timeo)) + if (!timeo || !llc_ui_wait_for_conn(sk, timeo)) { + rc = -EINPROGRESS; goto out; + } rc = sock_intr_errno(timeo); if (signal_pending(current)) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 160ae65a5c64..0a0f27836d57 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -438,6 +438,15 @@ ieee80211_verify_sta_ht_mcs_support(struct ieee80211_sub_if_data *sdata, ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap); /* + * Some Xfinity XB8 firmware advertises >1 spatial stream MCS indexes in + * their basic HT-MCS set. On cards with lower spatial streams, the check + * would fail, and we'd be stuck with no HT when it in fact work fine with + * its own supported rate. So check it only in strict mode. + */ + if (!ieee80211_hw_check(&sdata->local->hw, STRICT)) + return true; + + /* * P802.11REVme/D7.0 - 6.5.4.2.4 * ... * If the MLME of an HT STA receives an MLME-JOIN.request primitive @@ -9140,7 +9149,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, struct ieee80211_bss *bss = (void *)cbss->priv; struct sta_info *new_sta = NULL; struct ieee80211_link_data *link; - bool have_sta = false; + struct sta_info *have_sta = NULL; bool mlo; int err; u16 new_links; @@ -9159,11 +9168,8 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, mlo = false; } - if (assoc) { - rcu_read_lock(); + if (assoc) have_sta = sta_info_get(sdata, ap_mld_addr); - rcu_read_unlock(); - } if (mlo && !have_sta && WARN_ON(sdata->vif.valid_links || sdata->vif.active_links)) @@ -9327,6 +9333,8 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, out_release_chan: ieee80211_link_release_channel(link); out_err: + if (mlo && have_sta) + WARN_ON(__sta_info_destroy(have_sta)); ieee80211_vif_set_links(sdata, 0, 0); return err; } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 3e5d1c47a5b0..d18e962126ce 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4971,7 +4971,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, struct sk_buff *skb = rx->skb; struct ieee80211_hdr *hdr = (void *)skb->data; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); - static ieee80211_rx_result res; + ieee80211_rx_result res; int orig_len = skb->len; int hdrlen = ieee80211_hdrlen(hdr->frame_control); int snap_offs = hdrlen; @@ -5380,7 +5380,9 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, if (!link_sta) goto out; - ieee80211_rx_data_set_link(&rx, link_sta->link_id); + if (!ieee80211_rx_data_set_link(&rx, + link_sta->link_id)) + goto out; } if (ieee80211_prepare_and_rx_handle(&rx, skb, true)) diff --git a/net/mac80211/tests/chan-mode.c b/net/mac80211/tests/chan-mode.c index adc069065e73..fa370831d617 100644 --- a/net/mac80211/tests/chan-mode.c +++ b/net/mac80211/tests/chan-mode.c @@ -65,6 +65,7 @@ static const struct determine_chan_mode_case { .ht_capa_mask = { .mcs.rx_mask[0] = 0xf7, }, + .strict = true, }, { .desc = "Masking out a RX rate in VHT capabilities", .conn_mode = IEEE80211_CONN_MODE_EHT, diff --git a/net/mac80211/util.c b/net/mac80211/util.c index b093bc203c81..2529b01e2cd5 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3700,11 +3700,11 @@ void ieee80211_dfs_radar_detected_work(struct wiphy *wiphy, struct ieee80211_local *local = container_of(work, struct ieee80211_local, radar_detected_work); struct cfg80211_chan_def chandef; - struct ieee80211_chanctx *ctx; + struct ieee80211_chanctx *ctx, *tmp; lockdep_assert_wiphy(local->hw.wiphy); - list_for_each_entry(ctx, &local->chanctx_list, list) { + list_for_each_entry_safe(ctx, tmp, &local->chanctx_list, list) { if (ctx->replace_state == IEEE80211_CHANCTX_REPLACES_OTHER) continue; diff --git a/net/mctp/route.c b/net/mctp/route.c index 26fb8c6bbad2..1f3dccbb7aed 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -441,6 +441,7 @@ static int mctp_dst_input(struct mctp_dst *dst, struct sk_buff *skb) unsigned long f; u8 tag, flags; int rc; + u8 ver; msk = NULL; rc = -EINVAL; @@ -467,7 +468,8 @@ static int mctp_dst_input(struct mctp_dst *dst, struct sk_buff *skb) netid = mctp_cb(skb)->net; skb_pull(skb, sizeof(struct mctp_hdr)); - if (mh->ver != 1) + ver = mh->ver & MCTP_HDR_VER_MASK; + if (ver < MCTP_VER_MIN || ver > MCTP_VER_MAX) goto out; flags = mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM); @@ -1317,6 +1319,7 @@ static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev, struct mctp_dst dst; struct mctp_hdr *mh; int rc; + u8 ver; rcu_read_lock(); mdev = __mctp_dev_get(dev); @@ -1334,7 +1337,8 @@ static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev, /* We have enough for a header; decode and route */ mh = mctp_hdr(skb); - if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX) + ver = mh->ver & MCTP_HDR_VER_MASK; + if (ver < MCTP_VER_MIN || ver > MCTP_VER_MAX) goto err_drop; /* source must be valid unicast or null; drop reserved ranges and diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index e1033643fab0..e4b230ef6099 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -920,9 +920,9 @@ static void mctp_test_route_input_cloned_frag(struct kunit *test) static void mctp_test_route_input_null_eid(struct kunit *test) { struct mctp_hdr hdr = RX_HDR(1, 10, 0, FL_S | FL_E | FL_TO); + struct sockaddr_mctp addr = { 0 }; struct sk_buff *skb_pkt, *skb_sk; struct mctp_test_dev *dev; - struct sockaddr_mctp addr; struct socket *sock; u8 type = 0; int rc; diff --git a/net/mctp/test/utils.c b/net/mctp/test/utils.c index c3987d5ade7a..6eef8d485c25 100644 --- a/net/mctp/test/utils.c +++ b/net/mctp/test/utils.c @@ -116,7 +116,7 @@ void mctp_test_destroy_dev(struct mctp_test_dev *dev) static int mctp_test_dst_output(struct mctp_dst *dst, struct sk_buff *skb) { skb->dev = dst->dev->dev; - dev_queue_xmit(skb); + dev_direct_xmit(skb, 0); return 0; } diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index 8a16672b94e2..4cc16cbeb328 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -14,7 +14,7 @@ struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk) { - if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk)) + if (sk && sk_fullsock(sk) && sk_is_tcp(sk) && sk_is_mptcp(sk)) return mptcp_sk(mptcp_subflow_ctx(sk)->conn); return NULL; diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c index 82ec15bcfd7f..082c46c0f50e 100644 --- a/net/mptcp/fastopen.c +++ b/net/mptcp/fastopen.c @@ -12,6 +12,7 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf struct sock *sk, *ssk; struct sk_buff *skb; struct tcp_sock *tp; + bool has_rxtstamp; /* on early fallback the subflow context is deleted by * subflow_syn_recv_sock() @@ -40,12 +41,13 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf */ tp->copied_seq += skb->len; subflow->ssn_offset += skb->len; + has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; /* Only the sequence delta is relevant */ MPTCP_SKB_CB(skb)->map_seq = -skb->len; MPTCP_SKB_CB(skb)->end_seq = 0; MPTCP_SKB_CB(skb)->offset = 0; - MPTCP_SKB_CB(skb)->has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; + MPTCP_SKB_CB(skb)->has_rxtstamp = has_rxtstamp; MPTCP_SKB_CB(skb)->cant_coalesce = 1; mptcp_data_lock(sk); diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 57a456690406..3c152bf66cd5 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -16,6 +16,7 @@ struct mptcp_pm_add_entry { struct list_head list; struct mptcp_addr_info addr; u8 retrans_times; + bool timer_done; struct timer_list add_timer; struct mptcp_sock *sock; struct rcu_head rcu; @@ -283,6 +284,9 @@ int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk, struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct mptcp_addr_info local, remote; + if (!__mptcp_subflow_active(subflow)) + continue; + mptcp_local_address((struct sock_common *)ssk, &local); if (!mptcp_addresses_equal(&local, addr, addr->port)) continue; @@ -305,18 +309,31 @@ static unsigned int mptcp_adjust_add_addr_timeout(struct mptcp_sock *msk) const struct net *net = sock_net((struct sock *)msk); unsigned int rto = mptcp_get_add_addr_timeout(net); struct mptcp_subflow_context *subflow; - unsigned int max = 0; + unsigned int max = 0, max_stale = 0; + + if (!rto) + return 0; mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct inet_connection_sock *icsk = inet_csk(ssk); - if (icsk->icsk_rto > max) + if (!__mptcp_subflow_active(subflow)) + continue; + + if (unlikely(subflow->stale)) { + if (icsk->icsk_rto > max_stale) + max_stale = icsk->icsk_rto; + } else if (icsk->icsk_rto > max) { max = icsk->icsk_rto; + } } - if (max && max < rto) - rto = max; + if (max) + return min(max, rto); + + if (max_stale) + return min(max_stale, rto); return rto; } @@ -327,26 +344,22 @@ static void mptcp_pm_add_timer(struct timer_list *timer) add_timer); struct mptcp_sock *msk = entry->sock; struct sock *sk = (struct sock *)msk; - unsigned int timeout; + unsigned int timeout = 0; pr_debug("msk=%p\n", msk); - if (!msk) - return; - - if (inet_sk_state_load(sk) == TCP_CLOSE) - return; - - if (!entry->addr.id) - return; + bh_lock_sock(sk); + if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE)) + goto out; - if (mptcp_pm_should_add_signal_addr(msk)) { - sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8); + if (sock_owned_by_user(sk)) { + /* Try again later. */ + timeout = HZ / 20; goto out; } timeout = mptcp_adjust_add_addr_timeout(msk); - if (!timeout) + if (!timeout || mptcp_pm_should_add_signal_addr(msk)) goto out; spin_lock_bh(&msk->pm.lock); @@ -359,8 +372,9 @@ static void mptcp_pm_add_timer(struct timer_list *timer) } if (entry->retrans_times < ADD_ADDR_RETRANS_MAX) - sk_reset_timer(sk, timer, - jiffies + (timeout << entry->retrans_times)); + timeout <<= entry->retrans_times; + else + timeout = 0; spin_unlock_bh(&msk->pm.lock); @@ -368,7 +382,13 @@ static void mptcp_pm_add_timer(struct timer_list *timer) mptcp_pm_subflow_established(msk); out: - __sock_put(sk); + if (timeout) + sk_reset_timer(sk, timer, jiffies + timeout); + else + /* if sock_put calls sk_free: avoid waiting for this timer */ + entry->timer_done = true; + bh_unlock_sock(sk); + sock_put(sk); } struct mptcp_pm_add_entry * @@ -431,6 +451,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0); reset_timer: + add_entry->timer_done = false; timeout = mptcp_adjust_add_addr_timeout(msk); if (timeout) sk_reset_timer(sk, &add_entry->add_timer, jiffies + timeout); @@ -451,7 +472,8 @@ static void mptcp_pm_free_anno_list(struct mptcp_sock *msk) spin_unlock_bh(&msk->pm.lock); list_for_each_entry_safe(entry, tmp, &free_list, list) { - sk_stop_timer_sync(sk, &entry->add_timer); + if (!entry->timer_done) + sk_stop_timer_sync(sk, &entry->add_timer); kfree_rcu(entry, rcu); } } diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c index 0ebf43be9939..fc818b63752e 100644 --- a/net/mptcp/pm_kernel.c +++ b/net/mptcp/pm_kernel.c @@ -347,6 +347,8 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) /* check first for announce */ if (msk->pm.add_addr_signaled < endp_signal_max) { + u8 endp_id; + /* due to racing events on both ends we can reach here while * previous add address is still running: if we invoke now * mptcp_pm_announce_addr(), that will fail and the @@ -360,19 +362,20 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) if (!select_signal_address(pernet, msk, &local)) goto subflow; + /* Special case for ID0: set the correct ID */ + endp_id = local.addr.id; + if (endp_id == msk->mpc_endpoint_id) + local.addr.id = 0; + /* If the alloc fails, we are on memory pressure, not worth * continuing, and trying to create subflows. */ if (!mptcp_pm_alloc_anno_list(msk, &local.addr)) return; - __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); + __clear_bit(endp_id, msk->pm.id_avail_bitmap); msk->pm.add_addr_signaled++; - /* Special case for ID0: set the correct ID */ - if (local.addr.id == msk->mpc_endpoint_id) - local.addr.id = 0; - mptcp_pm_announce_addr(msk, &local.addr, false); mptcp_pm_addr_send_ack(msk); @@ -1278,6 +1281,7 @@ static void __reset_counters(struct pm_nl_pernet *pernet) WRITE_ONCE(pernet->endp_signal_max, 0); WRITE_ONCE(pernet->endp_subflow_max, 0); WRITE_ONCE(pernet->endp_laminar_max, 0); + WRITE_ONCE(pernet->endp_fullmesh_max, 0); pernet->endpoints = 0; } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index fbffd3a43fe8..4546a8b09884 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3302,7 +3302,8 @@ bool __mptcp_close(struct sock *sk, long timeout) goto cleanup; } - if (mptcp_data_avail(msk) || timeout < 0) { + if (mptcp_data_avail(msk) || timeout < 0 || + (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime)) { /* If the msk has read data, or the caller explicitly ask it, * do the MPTCP equivalent of TCP reset, aka MPTCP fastclose */ @@ -3594,7 +3595,6 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, * uses the correct data */ mptcp_copy_inaddrs(nsk, ssk); - __mptcp_propagate_sndbuf(nsk, ssk); mptcp_rcv_space_init(msk, ssk); msk->rcvq_space.time = mptcp_stamp(); @@ -4252,6 +4252,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, mptcp_graft_subflows(newsk); mptcp_rps_record_subflows(msk); + __mptcp_propagate_sndbuf(newsk, mptcp_subflow_tcp_sock(subflow)); /* Do late cleanup for the first subflow as necessary. Also * deal with bad peers not doing a complete shutdown. diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index de90a2897d2d..1cf608e7357b 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -159,10 +159,10 @@ static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optnam lock_sock(sk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - bool slow = lock_sock_fast(ssk); - sock_set_timestamp(sk, optname, !!val); - unlock_sock_fast(ssk, slow); + lock_sock(ssk); + sock_set_timestamp(ssk, optname, !!val); + release_sock(ssk); } release_sock(sk); @@ -235,10 +235,10 @@ static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - bool slow = lock_sock_fast(ssk); - sock_set_timestamping(sk, optname, timestamping); - unlock_sock_fast(ssk, slow); + lock_sock(ssk); + sock_set_timestamping(ssk, optname, timestamping); + release_sock(ssk); } release_sock(sk); @@ -812,6 +812,10 @@ static int mptcp_setsockopt_all_sf(struct mptcp_sock *msk, int level, if (ret) break; } + + if (!ret) + sockopt_seq_inc(msk); + return ret; } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index e2cb9d23e4a0..d562e149606f 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -581,7 +581,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->backup); if (!subflow_thmac_valid(subflow)) { - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC); + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKMAC); subflow->reset_reason = MPTCP_RST_EMPTCP; goto do_reset; } @@ -908,7 +908,7 @@ create_child: if (!subflow_hmac_valid(subflow_req, &mp_opt)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); - subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); + subflow_add_reset_reason(skb, MPTCP_RST_EMPTCP); goto dispose_child; } diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 2082bfb2d93c..9ea6b4fa78bf 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -267,27 +267,20 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) hash_key2 = hash_key; use2 = false; } + conn_tab_lock(t, cp, hash_key, hash_key2, use2, true /* new_hash */, &head, &head2); - spin_lock(&cp->lock); - - if (!(cp->flags & IP_VS_CONN_F_HASHED)) { - cp->flags |= IP_VS_CONN_F_HASHED; - WRITE_ONCE(cp->hn0.hash_key, hash_key); - WRITE_ONCE(cp->hn1.hash_key, hash_key2); - refcount_inc(&cp->refcnt); - hlist_bl_add_head_rcu(&cp->hn0.node, head); - if (use2) - hlist_bl_add_head_rcu(&cp->hn1.node, head2); - ret = 1; - } else { - pr_err("%s(): request for already hashed, called from %pS\n", - __func__, __builtin_return_address(0)); - ret = 0; - } - spin_unlock(&cp->lock); + cp->flags |= IP_VS_CONN_F_HASHED; + WRITE_ONCE(cp->hn0.hash_key, hash_key); + WRITE_ONCE(cp->hn1.hash_key, hash_key2); + refcount_inc(&cp->refcnt); + hlist_bl_add_head_rcu(&cp->hn0.node, head); + if (use2) + hlist_bl_add_head_rcu(&cp->hn1.node, head2); + conn_tab_unlock(head, head2); + ret = 1; /* Schedule resizing if load increases */ if (atomic_read(&ipvs->conn_count) > t->u_thresh && @@ -321,7 +314,6 @@ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp) conn_tab_lock(t, cp, hash_key, hash_key2, use2, false /* new_hash */, &head, &head2); - spin_lock(&cp->lock); if (cp->flags & IP_VS_CONN_F_HASHED) { /* Decrease refcnt and unlink conn only if we are last user */ @@ -334,7 +326,6 @@ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp) } } - spin_unlock(&cp->lock); conn_tab_unlock(head, head2); rcu_read_unlock(); @@ -637,6 +628,7 @@ void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport) struct ip_vs_conn_hnode *hn; u32 hash_key, hash_key_new; struct ip_vs_conn_param p; + bool by_me = false; int ntbl; int dir; @@ -664,8 +656,16 @@ retry: t = rcu_dereference(t->new_tbl); ntbl++; /* We are lost? */ - if (ntbl >= 2) + if (ntbl >= 2) { + spin_lock_bh(&cp->lock); + if (cp->flags & IP_VS_CONN_F_NO_CPORT && by_me) + cp->cport = 0; + /* hn1 will be rehashed on next packet */ + spin_unlock_bh(&cp->lock); + IP_VS_ERR_RL("%s(): Too many ht changes for dir %d\n", + __func__, dir); return; + } } /* Rehashing during resize? Use the recent table for adds */ @@ -683,10 +683,13 @@ retry: if (head > head2 && t == t2) swap(head, head2); + /* Protect the cp->flags modification */ + spin_lock_bh(&cp->lock); + /* Lock seqcount only for the old bucket, even if we are on new table * because it affects the del operation, not the adding. */ - spin_lock_bh(&t->lock[hash_key & t->lock_mask].l); + spin_lock(&t->lock[hash_key & t->lock_mask].l); preempt_disable_nested(); write_seqcount_begin(&t->seqc[hash_key & t->seqc_mask]); @@ -704,14 +707,23 @@ retry: hlist_bl_unlock(head); write_seqcount_end(&t->seqc[hash_key & t->seqc_mask]); preempt_enable_nested(); - spin_unlock_bh(&t->lock[hash_key & t->lock_mask].l); + spin_unlock(&t->lock[hash_key & t->lock_mask].l); + spin_unlock_bh(&cp->lock); hash_key = hash_key_new; goto retry; } - spin_lock(&cp->lock); - if ((cp->flags & IP_VS_CONN_F_NO_CPORT) && - (cp->flags & IP_VS_CONN_F_HASHED)) { + /* Fill cport once, even if multiple packets try to do it */ + if (cp->flags & IP_VS_CONN_F_NO_CPORT && (!cp->cport || by_me)) { + /* If we race with resizing make sure cport is set for dir 1 */ + if (!cp->cport) { + cp->cport = cport; + by_me = true; + } + if (!dir) { + atomic_dec(&ipvs->no_cport_conns[af_id]); + cp->flags &= ~IP_VS_CONN_F_NO_CPORT; + } /* We do not recalc hash_key_r under lock, we assume the * parameters in cp do not change, i.e. cport is * the only possible change. @@ -726,21 +738,17 @@ retry: hlist_bl_del_rcu(&hn->node); hlist_bl_add_head_rcu(&hn->node, head_new); } - if (!dir) { - atomic_dec(&ipvs->no_cport_conns[af_id]); - cp->flags &= ~IP_VS_CONN_F_NO_CPORT; - cp->cport = cport; - } } - spin_unlock(&cp->lock); if (head != head2) hlist_bl_unlock(head2); hlist_bl_unlock(head); write_seqcount_end(&t->seqc[hash_key & t->seqc_mask]); preempt_enable_nested(); - spin_unlock_bh(&t->lock[hash_key & t->lock_mask].l); - if (dir--) + spin_unlock(&t->lock[hash_key & t->lock_mask].l); + + spin_unlock_bh(&cp->lock); + if (dir-- && by_me) goto next_dir; } @@ -1835,7 +1843,7 @@ static void ip_vs_conn_flush(struct netns_ipvs *ipvs) if (!rcu_dereference_protected(ipvs->conn_tab, 1)) return; - cancel_delayed_work_sync(&ipvs->conn_resize_work); + disable_delayed_work_sync(&ipvs->conn_resize_work); if (!atomic_read(&ipvs->conn_count)) goto unreg; diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index f5b7a2047291..d40b404c1bf6 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -237,7 +237,7 @@ int ip_vs_rht_desired_size(struct netns_ipvs *ipvs, struct ip_vs_rht *t, int n, { if (!t) return 1 << min_bits; - n = roundup_pow_of_two(n); + n = n > 0 ? roundup_pow_of_two(n) : 1; if (lfactor < 0) { int factor = min(-lfactor, max_bits); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 6632daa87ded..c7c7f6a7a9f6 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -261,12 +261,28 @@ static void est_reload_work_handler(struct work_struct *work) if (!kd) continue; /* New config ? Stop kthread tasks */ - if (genid != genid_done) - ip_vs_est_kthread_stop(kd); + if (genid != genid_done) { + if (!id) { + /* Only we can stop kt 0 but not under mutex */ + mutex_unlock(&ipvs->est_mutex); + ip_vs_est_kthread_stop(kd); + mutex_lock(&ipvs->est_mutex); + if (!READ_ONCE(ipvs->enable)) + goto unlock; + /* kd for kt 0 is never destroyed */ + } else { + ip_vs_est_kthread_stop(kd); + } + } if (!kd->task && !ip_vs_est_stopped(ipvs)) { + bool start; + /* Do not start kthreads above 0 in calc phase */ - if ((!id || !ipvs->est_calc_phase) && - ip_vs_est_kthread_start(ipvs, kd) < 0) + if (id) + start = !ipvs->est_calc_phase; + else + start = kd->needed; + if (start && ip_vs_est_kthread_start(ipvs, kd) < 0) repeat = true; } } @@ -1102,6 +1118,24 @@ out: return dest; } +/* Put destination in trash */ +static void ip_vs_trash_put_dest(struct netns_ipvs *ipvs, + struct ip_vs_dest *dest, unsigned long istart, + bool cleanup) +{ + spin_lock_bh(&ipvs->dest_trash_lock); + IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", + IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), + refcount_read(&dest->refcnt)); + if (list_empty(&ipvs->dest_trash) && !cleanup) + mod_timer(&ipvs->dest_trash_timer, + jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); + /* dest lives in trash with reference */ + list_add(&dest->t_list, &ipvs->dest_trash); + dest->idle_start = istart; + spin_unlock_bh(&ipvs->dest_trash_lock); +} + static void ip_vs_dest_rcu_free(struct rcu_head *head) { struct ip_vs_dest *dest; @@ -1461,9 +1495,12 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) ntohs(dest->vport)); ret = ip_vs_start_estimator(svc->ipvs, &dest->stats); + /* On error put back dest into the trash */ if (ret < 0) - return ret; - __ip_vs_update_dest(svc, dest, udest, 1); + ip_vs_trash_put_dest(svc->ipvs, dest, dest->idle_start, + false); + else + __ip_vs_update_dest(svc, dest, udest, 1); } else { /* * Allocate and initialize the dest structure @@ -1533,17 +1570,7 @@ static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest, */ ip_vs_rs_unhash(dest); - spin_lock_bh(&ipvs->dest_trash_lock); - IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", - IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), - refcount_read(&dest->refcnt)); - if (list_empty(&ipvs->dest_trash) && !cleanup) - mod_timer(&ipvs->dest_trash_timer, - jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); - /* dest lives in trash with reference */ - list_add(&dest->t_list, &ipvs->dest_trash); - dest->idle_start = 0; - spin_unlock_bh(&ipvs->dest_trash_lock); + ip_vs_trash_put_dest(ipvs, dest, 0, cleanup); /* Queue up delayed work to expire all no destination connections. * No-op when CONFIG_SYSCTL is disabled. @@ -1812,11 +1839,16 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, *svc_p = svc; if (!READ_ONCE(ipvs->enable)) { + mutex_lock(&ipvs->est_mutex); + /* Now there is a service - full throttle */ WRITE_ONCE(ipvs->enable, 1); + ipvs->est_max_threads = ip_vs_est_max_threads(ipvs); + /* Start estimation for first time */ - ip_vs_est_reload_start(ipvs); + ip_vs_est_reload_start(ipvs, true); + mutex_unlock(&ipvs->est_mutex); } return 0; @@ -2032,6 +2064,9 @@ static int ip_vs_del_service(struct ip_vs_service *svc) cancel_delayed_work_sync(&ipvs->svc_resize_work); if (t) { rcu_assign_pointer(ipvs->svc_table, NULL); + /* Inform readers that table is removed */ + smp_mb__before_atomic(); + atomic_inc(&ipvs->svc_table_changes); while (1) { p = rcu_dereference_protected(t->new_tbl, 1); call_rcu(&t->rcu_head, ip_vs_rht_rcu_free); @@ -2078,6 +2113,9 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup) t = rcu_dereference_protected(ipvs->svc_table, 1); if (t) { rcu_assign_pointer(ipvs->svc_table, NULL); + /* Inform readers that table is removed */ + smp_mb__before_atomic(); + atomic_inc(&ipvs->svc_table_changes); while (1) { p = rcu_dereference_protected(t->new_tbl, 1); call_rcu(&t->rcu_head, ip_vs_rht_rcu_free); @@ -2086,6 +2124,11 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup) t = p; } } + /* Stop the tot_stats estimator early under service_mutex + * to avoid locking it again later. + */ + if (cleanup) + ip_vs_stop_estimator_tot_stats(ipvs); return 0; } @@ -2331,7 +2374,7 @@ static int ipvs_proc_est_cpumask_set(const struct ctl_table *table, /* est_max_threads may depend on cpulist size */ ipvs->est_max_threads = ip_vs_est_max_threads(ipvs); ipvs->est_calc_phase = 1; - ip_vs_est_reload_start(ipvs); + ip_vs_est_reload_start(ipvs, true); unlock: mutex_unlock(&ipvs->est_mutex); @@ -2351,11 +2394,14 @@ static int ipvs_proc_est_cpumask_get(const struct ctl_table *table, mutex_lock(&ipvs->est_mutex); - if (ipvs->est_cpulist_valid) - mask = *valp; - else - mask = (struct cpumask *)housekeeping_cpumask(HK_TYPE_KTHREAD); - ret = scnprintf(buffer, size, "%*pbl\n", cpumask_pr_args(mask)); + /* HK_TYPE_KTHREAD cpumask needs RCU protection */ + scoped_guard(rcu) { + if (ipvs->est_cpulist_valid) + mask = *valp; + else + mask = (struct cpumask *)housekeeping_cpumask(HK_TYPE_KTHREAD); + ret = scnprintf(buffer, size, "%*pbl\n", cpumask_pr_args(mask)); + } mutex_unlock(&ipvs->est_mutex); @@ -2411,7 +2457,7 @@ static int ipvs_proc_est_nice(const struct ctl_table *table, int write, mutex_lock(&ipvs->est_mutex); if (*valp != val) { *valp = val; - ip_vs_est_reload_start(ipvs); + ip_vs_est_reload_start(ipvs, true); } mutex_unlock(&ipvs->est_mutex); } @@ -2438,7 +2484,7 @@ static int ipvs_proc_run_estimation(const struct ctl_table *table, int write, mutex_lock(&ipvs->est_mutex); if (*valp != val) { *valp = val; - ip_vs_est_reload_start(ipvs); + ip_vs_est_reload_start(ipvs, true); } mutex_unlock(&ipvs->est_mutex); } @@ -2463,7 +2509,7 @@ static int ipvs_proc_conn_lfactor(const struct ctl_table *table, int write, if (val < -8 || val > 8) { ret = -EINVAL; } else { - *valp = val; + WRITE_ONCE(*valp, val); if (rcu_access_pointer(ipvs->conn_tab)) mod_delayed_work(system_unbound_wq, &ipvs->conn_resize_work, 0); @@ -2490,10 +2536,16 @@ static int ipvs_proc_svc_lfactor(const struct ctl_table *table, int write, if (val < -8 || val > 8) { ret = -EINVAL; } else { - *valp = val; - if (rcu_access_pointer(ipvs->svc_table)) + mutex_lock(&ipvs->service_mutex); + WRITE_ONCE(*valp, val); + /* Make sure the services are present */ + if (rcu_access_pointer(ipvs->svc_table) && + READ_ONCE(ipvs->enable) && + !test_bit(IP_VS_WORK_SVC_NORESIZE, + &ipvs->work_flags)) mod_delayed_work(system_unbound_wq, &ipvs->svc_resize_work, 0); + mutex_unlock(&ipvs->service_mutex); } } return ret; @@ -3004,7 +3056,8 @@ static int ip_vs_status_show(struct seq_file *seq, void *v) int old_gen, new_gen; u32 counts[8]; u32 bucket; - int count; + u32 count; + int loops; u32 sum1; u32 sum; int i; @@ -3020,6 +3073,7 @@ static int ip_vs_status_show(struct seq_file *seq, void *v) if (!atomic_read(&ipvs->conn_count)) goto after_conns; old_gen = atomic_read(&ipvs->conn_tab_changes); + loops = 0; repeat_conn: smp_rmb(); /* ipvs->conn_tab and conn_tab_changes */ @@ -3032,8 +3086,11 @@ repeat_conn: resched_score++; ip_vs_rht_walk_bucket_rcu(t, bucket, head) { count = 0; - hlist_bl_for_each_entry_rcu(hn, e, head, node) + hlist_bl_for_each_entry_rcu(hn, e, head, node) { count++; + if (count >= ARRAY_SIZE(counts) - 1) + break; + } } resched_score += count; if (resched_score >= 100) { @@ -3042,37 +3099,41 @@ repeat_conn: new_gen = atomic_read(&ipvs->conn_tab_changes); /* New table installed ? */ if (old_gen != new_gen) { + /* Too many changes? */ + if (++loops >= 5) + goto after_conns; old_gen = new_gen; goto repeat_conn; } } - counts[min(count, (int)ARRAY_SIZE(counts) - 1)]++; + counts[count]++; } } for (sum = 0, i = 0; i < ARRAY_SIZE(counts); i++) sum += counts[i]; sum1 = sum - counts[0]; - seq_printf(seq, "Conn buckets empty:\t%u (%lu%%)\n", - counts[0], (unsigned long)counts[0] * 100 / max(sum, 1U)); + seq_printf(seq, "Conn buckets empty:\t%u (%llu%%)\n", + counts[0], div_u64((u64)counts[0] * 100U, max(sum, 1U))); for (i = 1; i < ARRAY_SIZE(counts); i++) { if (!counts[i]) continue; - seq_printf(seq, "Conn buckets len-%d:\t%u (%lu%%)\n", + seq_printf(seq, "Conn buckets len-%d:\t%u (%llu%%)\n", i, counts[i], - (unsigned long)counts[i] * 100 / max(sum1, 1U)); + div_u64((u64)counts[i] * 100U, max(sum1, 1U))); } after_conns: t = rcu_dereference(ipvs->svc_table); count = ip_vs_get_num_services(ipvs); - seq_printf(seq, "Services:\t%d\n", count); + seq_printf(seq, "Services:\t%u\n", count); seq_printf(seq, "Service buckets:\t%d (%d bits, lfactor %d)\n", t ? t->size : 0, t ? t->bits : 0, t ? t->lfactor : 0); if (!count) goto after_svc; old_gen = atomic_read(&ipvs->svc_table_changes); + loops = 0; repeat_svc: smp_rmb(); /* ipvs->svc_table and svc_table_changes */ @@ -3086,8 +3147,11 @@ repeat_svc: ip_vs_rht_walk_bucket_rcu(t, bucket, head) { count = 0; hlist_bl_for_each_entry_rcu(svc, e, head, - s_list) + s_list) { count++; + if (count >= ARRAY_SIZE(counts) - 1) + break; + } } resched_score += count; if (resched_score >= 100) { @@ -3096,24 +3160,27 @@ repeat_svc: new_gen = atomic_read(&ipvs->svc_table_changes); /* New table installed ? */ if (old_gen != new_gen) { + /* Too many changes? */ + if (++loops >= 5) + goto after_svc; old_gen = new_gen; goto repeat_svc; } } - counts[min(count, (int)ARRAY_SIZE(counts) - 1)]++; + counts[count]++; } } for (sum = 0, i = 0; i < ARRAY_SIZE(counts); i++) sum += counts[i]; sum1 = sum - counts[0]; - seq_printf(seq, "Service buckets empty:\t%u (%lu%%)\n", - counts[0], (unsigned long)counts[0] * 100 / max(sum, 1U)); + seq_printf(seq, "Service buckets empty:\t%u (%llu%%)\n", + counts[0], div_u64((u64)counts[0] * 100U, max(sum, 1U))); for (i = 1; i < ARRAY_SIZE(counts); i++) { if (!counts[i]) continue; - seq_printf(seq, "Service buckets len-%d:\t%u (%lu%%)\n", + seq_printf(seq, "Service buckets len-%d:\t%u (%llu%%)\n", i, counts[i], - (unsigned long)counts[i] * 100 / max(sum1, 1U)); + div_u64((u64)counts[i] * 100U, max(sum1, 1U))); } after_svc: @@ -4967,7 +5034,14 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) cancel_delayed_work_sync(&ipvs->defense_work); cancel_work_sync(&ipvs->defense_work.work); unregister_net_sysctl_table(ipvs->sysctl_hdr); - ip_vs_stop_estimator(ipvs, &ipvs->tot_stats->s); + if (ipvs->tot_stats->s.est.ktid != -2) { + /* Not stopped yet? This happens only on netns init error and + * we even do not need to lock the service_mutex for this case. + */ + mutex_lock(&ipvs->service_mutex); + ip_vs_stop_estimator(ipvs, &ipvs->tot_stats->s); + mutex_unlock(&ipvs->service_mutex); + } if (ipvs->est_cpulist_valid) free_cpumask_var(ipvs->sysctl_est_cpulist); @@ -5039,7 +5113,7 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) ipvs->net->proc_net, ip_vs_stats_percpu_show, NULL)) goto err_percpu; - if (!proc_create_net_single("ip_vs_status", 0, ipvs->net->proc_net, + if (!proc_create_net_single("ip_vs_status", 0440, ipvs->net->proc_net, ip_vs_status_show, NULL)) goto err_status; #endif diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 433ba3cab58c..ab09f5182951 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -68,6 +68,11 @@ and the limit of estimators per kthread - est_add_ktid: ktid where to add new ests, can point to empty slot where we should add kt data + - data protected by service_mutex: est_temp_list, est_add_ktid, + est_kt_count(R/W), est_kt_arr(R/W), est_genid_done, kd->needed(R/W) + - data protected by est_mutex: est_genid, est_max_threads, sysctl_est_cpulist, + est_cpulist_valid, sysctl_est_nice, est_stopped, sysctl_run_estimation, + est_kt_count(R), est_kt_arr(R), kd->needed(R), kd->task (id > 0) */ static struct lock_class_key __ipvs_est_key; @@ -227,14 +232,17 @@ static int ip_vs_estimation_kthread(void *data) } /* Schedule stop/start for kthread tasks */ -void ip_vs_est_reload_start(struct netns_ipvs *ipvs) +void ip_vs_est_reload_start(struct netns_ipvs *ipvs, bool restart) { + lockdep_assert_held(&ipvs->est_mutex); + /* Ignore reloads before first service is added */ if (!READ_ONCE(ipvs->enable)) return; ip_vs_est_stopped_recalc(ipvs); - /* Bump the kthread configuration genid */ - atomic_inc(&ipvs->est_genid); + /* Bump the kthread configuration genid if stopping is requested */ + if (restart) + atomic_inc(&ipvs->est_genid); queue_delayed_work(system_long_wq, &ipvs->est_reload_work, 0); } @@ -304,12 +312,17 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs) void *arr = NULL; int i; - if ((unsigned long)ipvs->est_kt_count >= ipvs->est_max_threads && - READ_ONCE(ipvs->enable) && ipvs->est_max_threads) - return -EINVAL; - mutex_lock(&ipvs->est_mutex); + /* Allow kt 0 data to be created before the services are added + * and limit the kthreads when services are present. + */ + if ((unsigned long)ipvs->est_kt_count >= ipvs->est_max_threads && + READ_ONCE(ipvs->enable) && ipvs->est_max_threads) { + ret = -EINVAL; + goto out; + } + for (i = 0; i < id; i++) { if (!ipvs->est_kt_arr[i]) break; @@ -333,6 +346,7 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs) kd->est_timer = jiffies; kd->id = id; ip_vs_est_set_params(ipvs, kd); + kd->needed = 1; /* Pre-allocate stats used in calc phase */ if (!id && !kd->calc_stats) { @@ -341,12 +355,8 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs) goto out; } - /* Start kthread tasks only when services are present */ - if (READ_ONCE(ipvs->enable) && !ip_vs_est_stopped(ipvs)) { - ret = ip_vs_est_kthread_start(ipvs, kd); - if (ret < 0) - goto out; - } + /* Request kthread to be started */ + ip_vs_est_reload_start(ipvs, false); if (arr) ipvs->est_kt_count++; @@ -482,12 +492,11 @@ out: /* Start estimation for stats */ int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats) { + struct ip_vs_est_kt_data *kd = ipvs->est_kt_count > 0 ? + ipvs->est_kt_arr[0] : NULL; struct ip_vs_estimator *est = &stats->est; int ret; - if (!ipvs->est_max_threads && READ_ONCE(ipvs->enable)) - ipvs->est_max_threads = ip_vs_est_max_threads(ipvs); - est->ktid = -1; est->ktrow = IPVS_EST_NTICKS - 1; /* Initial delay */ @@ -496,8 +505,15 @@ int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats) * will not allocate much memory, just for kt 0. */ ret = 0; - if (!ipvs->est_kt_count || !ipvs->est_kt_arr[0]) + if (!kd) { ret = ip_vs_est_add_kthread(ipvs); + } else if (!kd->needed) { + mutex_lock(&ipvs->est_mutex); + /* We have job for the kt 0 task */ + kd->needed = 1; + ip_vs_est_reload_start(ipvs, true); + mutex_unlock(&ipvs->est_mutex); + } if (ret >= 0) hlist_add_head(&est->list, &ipvs->est_temp_list); else @@ -578,16 +594,14 @@ void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats) } end_kt0: - /* kt 0 is freed after all other kthreads and chains are empty */ + /* kt 0 task is stopped after all other kt slots and chains are empty */ if (ipvs->est_kt_count == 1 && hlist_empty(&ipvs->est_temp_list)) { kd = ipvs->est_kt_arr[0]; - if (!kd || !kd->est_count) { + if (kd && !kd->est_count) { mutex_lock(&ipvs->est_mutex); - if (kd) { - ip_vs_est_kthread_destroy(kd); - ipvs->est_kt_arr[0] = NULL; - } - ipvs->est_kt_count--; + /* Keep the kt0 data but request kthread_stop */ + kd->needed = 0; + ip_vs_est_reload_start(ipvs, true); mutex_unlock(&ipvs->est_mutex); ipvs->est_add_ktid = 0; } @@ -647,9 +661,9 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, int *chain_max) u64 val; INIT_HLIST_HEAD(&chain); - mutex_lock(&ipvs->service_mutex); + mutex_lock(&ipvs->est_mutex); kd = ipvs->est_kt_arr[0]; - mutex_unlock(&ipvs->service_mutex); + mutex_unlock(&ipvs->est_mutex); s = kd ? kd->calc_stats : NULL; if (!s) goto out; @@ -748,16 +762,16 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs) if (!ip_vs_est_calc_limits(ipvs, &chain_max)) return; - mutex_lock(&ipvs->service_mutex); - /* Stop all other tasks, so that we can immediately move the * estimators to est_temp_list without RCU grace period */ mutex_lock(&ipvs->est_mutex); for (id = 1; id < ipvs->est_kt_count; id++) { /* netns clean up started, abort */ - if (!READ_ONCE(ipvs->enable)) - goto unlock2; + if (kthread_should_stop() || !READ_ONCE(ipvs->enable)) { + mutex_unlock(&ipvs->est_mutex); + return; + } kd = ipvs->est_kt_arr[id]; if (!kd) continue; @@ -765,9 +779,11 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs) } mutex_unlock(&ipvs->est_mutex); + mutex_lock(&ipvs->service_mutex); + /* Move all estimators to est_temp_list but carefully, * all estimators and kthread data can be released while - * we reschedule. Even for kthread 0. + * we reschedule. */ step = 0; @@ -849,9 +865,7 @@ walk_chain: ip_vs_stop_estimator(ipvs, stats); /* Tasks are stopped, move without RCU grace period */ est->ktid = -1; - est->ktrow = row - kd->est_row; - if (est->ktrow < 0) - est->ktrow += IPVS_EST_NTICKS; + est->ktrow = delay; hlist_add_head(&est->list, &ipvs->est_temp_list); /* kd freed ? */ if (last) @@ -889,7 +903,6 @@ end_dequeue: if (genid == atomic_read(&ipvs->est_genid)) ipvs->est_calc_phase = 0; -unlock2: mutex_unlock(&ipvs->est_mutex); unlock: diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 0fb5162992e5..ce542ed4b013 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -102,6 +102,18 @@ __ip_vs_dst_check(struct ip_vs_dest *dest) return dest_dst; } +/* Based on ip_exceeds_mtu(). */ +static bool ip_vs_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) +{ + if (skb->len <= mtu) + return false; + + if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) + return false; + + return true; +} + static inline bool __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu) { @@ -111,10 +123,9 @@ __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu) */ if (IP6CB(skb)->frag_max_size > mtu) return true; /* largest fragment violate MTU */ - } - else if (skb->len > mtu && !skb_is_gso(skb)) { + } else if (ip_vs_exceeds_mtu(skb, mtu)) return true; /* Packet size violate MTU size */ - } + return false; } @@ -232,7 +243,7 @@ static inline bool ensure_mtu_is_adequate(struct netns_ipvs *ipvs, int skb_af, return true; if (unlikely(ip_hdr(skb)->frag_off & htons(IP_DF) && - skb->len > mtu && !skb_is_gso(skb) && + ip_vs_exceeds_mtu(skb, mtu) && !ip_vs_iph_icmp(ipvsh))) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c index 4f39bf7c843f..75e53fde6b29 100644 --- a/net/netfilter/nf_conntrack_broadcast.c +++ b/net/netfilter/nf_conntrack_broadcast.c @@ -72,6 +72,7 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb, exp->flags = NF_CT_EXPECT_PERMANENT; exp->class = NF_CT_EXPECT_CLASS_DEFAULT; rcu_assign_pointer(exp->helper, helper); + rcu_assign_pointer(exp->assign_helper, NULL); write_pnet(&exp->net, net); #ifdef CONFIG_NF_CONNTRACK_ZONES exp->zone = ct->zone; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index b08189226320..8ba5b22a1eef 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1811,14 +1811,17 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, spin_lock_bh(&nf_conntrack_expect_lock); exp = nf_ct_find_expectation(net, zone, tuple, !tmpl || nf_ct_is_confirmed(tmpl)); if (exp) { + struct nf_conntrack_helper *assign_helper; + /* Welcome, Mr. Bond. We've been expecting you... */ __set_bit(IPS_EXPECTED_BIT, &ct->status); /* exp->master safe, refcnt bumped in nf_ct_find_expectation */ ct->master = exp->master; - if (exp->helper) { + assign_helper = rcu_dereference(exp->assign_helper); + if (assign_helper) { help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); if (help) - rcu_assign_pointer(help->helper, exp->helper); + rcu_assign_pointer(help->helper, assign_helper); } #ifdef CONFIG_NF_CONNTRACK_MARK diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 24d0576d84b7..8e943efbdf0a 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -344,6 +344,7 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class, helper = rcu_dereference(help->helper); rcu_assign_pointer(exp->helper, helper); + rcu_assign_pointer(exp->assign_helper, NULL); write_pnet(&exp->net, net); #ifdef CONFIG_NF_CONNTRACK_ZONES exp->zone = ct->zone; diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 3f5c50455b71..b2fe6554b9cf 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -643,7 +643,7 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3, &ct->tuplehash[!dir].tuple.dst.u3, IPPROTO_TCP, NULL, &port); - rcu_assign_pointer(exp->helper, &nf_conntrack_helper_h245); + rcu_assign_pointer(exp->assign_helper, &nf_conntrack_helper_h245); nathook = rcu_dereference(nfct_h323_nat_hook); if (memcmp(&ct->tuplehash[dir].tuple.src.u3, @@ -767,7 +767,7 @@ static int expect_callforwarding(struct sk_buff *skb, nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct), &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_TCP, NULL, &port); - rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); + rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_q931); nathook = rcu_dereference(nfct_h323_nat_hook); if (memcmp(&ct->tuplehash[dir].tuple.src.u3, @@ -1234,7 +1234,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3 : NULL, &ct->tuplehash[!dir].tuple.dst.u3, IPPROTO_TCP, NULL, &port); - rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); + rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_q931); exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple calls */ nathook = rcu_dereference(nfct_h323_nat_hook); @@ -1306,7 +1306,7 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct, nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct), &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_UDP, NULL, &port); - rcu_assign_pointer(exp->helper, nf_conntrack_helper_ras); + rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_ras); if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect RAS "); @@ -1523,7 +1523,7 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_TCP, NULL, &port); exp->flags = NF_CT_EXPECT_PERMANENT; - rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); + rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_q931); if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect Q.931 "); @@ -1577,7 +1577,7 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_TCP, NULL, &port); exp->flags = NF_CT_EXPECT_PERMANENT; - rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); + rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_q931); if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect Q.931 "); diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index a715304a53d8..b594cd244fe1 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -400,6 +400,11 @@ static bool expect_iter_me(struct nf_conntrack_expect *exp, void *data) this = rcu_dereference_protected(exp->helper, lockdep_is_held(&nf_conntrack_expect_lock)); + if (this == me) + return true; + + this = rcu_dereference_protected(exp->assign_helper, + lockdep_is_held(&nf_conntrack_expect_lock)); return this == me; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index eda5fe4a75c8..befa7e83ee49 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2634,6 +2634,7 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = { static struct nf_conntrack_expect * ctnetlink_alloc_expect(const struct nlattr *const cda[], struct nf_conn *ct, + const struct nf_conntrack_helper *assign_helper, struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *mask); @@ -2860,6 +2861,7 @@ static int ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct, u32 portid, u32 report) { + struct nf_conntrack_helper *assign_helper = NULL; struct nlattr *cda[CTA_EXPECT_MAX+1]; struct nf_conntrack_tuple tuple, mask; struct nf_conntrack_expect *exp; @@ -2870,13 +2872,26 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct, if (err < 0) return err; + if (!cda[CTA_EXPECT_TUPLE] || !cda[CTA_EXPECT_MASK]) + return -EINVAL; + err = ctnetlink_glue_exp_parse((const struct nlattr * const *)cda, ct, &tuple, &mask); if (err < 0) return err; + if (cda[CTA_EXPECT_HELP_NAME]) { + const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]); + + assign_helper = __nf_conntrack_helper_find(helpname, + nf_ct_l3num(ct), + tuple.dst.protonum); + if (!assign_helper) + return -EOPNOTSUPP; + } + exp = ctnetlink_alloc_expect((const struct nlattr * const *)cda, ct, - &tuple, &mask); + assign_helper, &tuple, &mask); if (IS_ERR(exp)) return PTR_ERR(exp); @@ -3515,6 +3530,7 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr, static struct nf_conntrack_expect * ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, + const struct nf_conntrack_helper *assign_helper, struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *mask) { @@ -3568,6 +3584,7 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, exp->zone = ct->zone; #endif rcu_assign_pointer(exp->helper, helper); + rcu_assign_pointer(exp->assign_helper, assign_helper); exp->tuple = *tuple; exp->mask.src.u3 = mask->src.u3; exp->mask.src.u.all = mask->src.u.all; @@ -3623,7 +3640,7 @@ ctnetlink_create_expect(struct net *net, ct = nf_ct_tuplehash_to_ctrack(h); rcu_read_lock(); - exp = ctnetlink_alloc_expect(cda, ct, &tuple, &mask); + exp = ctnetlink_alloc_expect(cda, ct, NULL, &tuple, &mask); if (IS_ERR(exp)) { err = PTR_ERR(exp); goto err_rcu; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 645d2c43ebf7..7e10fa65cbdd 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -466,9 +466,13 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, if (!ih) goto out_unlock; - if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir]) - ct->proto.sctp.init[!dir] = 0; - ct->proto.sctp.init[dir] = 1; + /* Do not record INIT matching peer vtag (stale or retransmitted INIT). */ + if (old_state == SCTP_CONNTRACK_NONE || + ct->proto.sctp.vtag[!dir] != ih->init_tag) { + if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir]) + ct->proto.sctp.init[!dir] = 0; + ct->proto.sctp.init[dir] = 1; + } pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir); ct->proto.sctp.vtag[!dir] = ih->init_tag; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 182cfb119448..e69941f1a101 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -181,6 +181,57 @@ static int sip_parse_addr(const struct nf_conn *ct, const char *cp, return 1; } +/* Parse optional port number after IP address. + * Returns false on malformed input, true otherwise. + * If port is non-NULL, stores parsed port in network byte order. + * If no port is present, sets *port to default SIP port. + */ +static bool sip_parse_port(const char *dptr, const char **endp, + const char *limit, __be16 *port) +{ + unsigned int p = 0; + int len = 0; + + if (dptr >= limit) + return false; + + if (*dptr != ':') { + if (port) + *port = htons(SIP_PORT); + if (endp) + *endp = dptr; + return true; + } + + dptr++; /* skip ':' */ + + while (dptr < limit && isdigit(*dptr)) { + p = p * 10 + (*dptr - '0'); + dptr++; + len++; + if (len > 5) /* max "65535" */ + return false; + } + + if (len == 0) + return false; + + /* reached limit while parsing port */ + if (dptr >= limit) + return false; + + if (p < 1024 || p > 65535) + return false; + + if (port) + *port = htons(p); + + if (endp) + *endp = dptr; + + return true; +} + /* skip ip address. returns its length. */ static int epaddr_len(const struct nf_conn *ct, const char *dptr, const char *limit, int *shift) @@ -193,11 +244,8 @@ static int epaddr_len(const struct nf_conn *ct, const char *dptr, return 0; } - /* Port number */ - if (*dptr == ':') { - dptr++; - dptr += digits_len(ct, dptr, limit, shift); - } + if (!sip_parse_port(dptr, &dptr, limit, NULL)) + return 0; return dptr - aux; } @@ -228,6 +276,51 @@ static int skp_epaddr_len(const struct nf_conn *ct, const char *dptr, return epaddr_len(ct, dptr, limit, shift); } +/* simple_strtoul stops after first non-number character. + * But as we're not dealing with c-strings, we can't rely on + * hitting \r,\n,\0 etc. before moving past end of buffer. + * + * This is a variant of simple_strtoul, but doesn't require + * a c-string. + * + * If value exceeds UINT_MAX, 0 is returned. + */ +static unsigned int sip_strtouint(const char *cp, unsigned int len, char **endp) +{ + const unsigned int max = sizeof("4294967295"); + unsigned int olen = len; + const char *s = cp; + u64 result = 0; + + if (len > max) + len = max; + + while (olen > 0 && isdigit(*s)) { + unsigned int value; + + if (len == 0) + goto err; + + value = *s - '0'; + result = result * 10 + value; + + if (result > UINT_MAX) + goto err; + s++; + len--; + olen--; + } + + if (endp) + *endp = (char *)s; + + return result; +err: + if (endp) + *endp = (char *)cp; + return 0; +} + /* Parse a SIP request line of the form: * * Request-Line = Method SP Request-URI SP SIP-Version CRLF @@ -241,7 +334,6 @@ int ct_sip_parse_request(const struct nf_conn *ct, { const char *start = dptr, *limit = dptr + datalen, *end; unsigned int mlen; - unsigned int p; int shift = 0; /* Skip method and following whitespace */ @@ -267,14 +359,8 @@ int ct_sip_parse_request(const struct nf_conn *ct, if (!sip_parse_addr(ct, dptr, &end, addr, limit, true)) return -1; - if (end < limit && *end == ':') { - end++; - p = simple_strtoul(end, (char **)&end, 10); - if (p < 1024 || p > 65535) - return -1; - *port = htons(p); - } else - *port = htons(SIP_PORT); + if (!sip_parse_port(end, &end, limit, port)) + return -1; if (end == dptr) return 0; @@ -509,7 +595,6 @@ int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr, union nf_inet_addr *addr, __be16 *port) { const char *c, *limit = dptr + datalen; - unsigned int p; int ret; ret = ct_sip_walk_headers(ct, dptr, dataoff ? *dataoff : 0, datalen, @@ -520,14 +605,8 @@ int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr, if (!sip_parse_addr(ct, dptr + *matchoff, &c, addr, limit, true)) return -1; - if (*c == ':') { - c++; - p = simple_strtoul(c, (char **)&c, 10); - if (p < 1024 || p > 65535) - return -1; - *port = htons(p); - } else - *port = htons(SIP_PORT); + if (!sip_parse_port(c, &c, limit, port)) + return -1; if (dataoff) *dataoff = c - dptr; @@ -609,7 +688,7 @@ int ct_sip_parse_numerical_param(const struct nf_conn *ct, const char *dptr, return 0; start += strlen(name); - *val = simple_strtoul(start, &end, 0); + *val = sip_strtouint(start, limit - start, (char **)&end); if (start == end) return -1; if (matchoff && matchlen) { @@ -1064,6 +1143,8 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, mediaoff = sdpoff; for (i = 0; i < ARRAY_SIZE(sdp_media_types); ) { + char *end; + if (ct_sip_get_sdp_header(ct, *dptr, mediaoff, *datalen, SDP_HDR_MEDIA, SDP_HDR_UNSPEC, &mediaoff, &medialen) <= 0) @@ -1079,8 +1160,8 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, mediaoff += t->len; medialen -= t->len; - port = simple_strtoul(*dptr + mediaoff, NULL, 10); - if (port == 0) + port = sip_strtouint(*dptr + mediaoff, *datalen - mediaoff, (char **)&end); + if (port == 0 || *dptr + mediaoff == end) continue; if (port < 1024 || port > 65535) { nf_ct_helper_log(skb, ct, "wrong port %u", port); @@ -1254,7 +1335,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, */ if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_EXPIRES, &matchoff, &matchlen) > 0) - expires = simple_strtoul(*dptr + matchoff, NULL, 10); + expires = sip_strtouint(*dptr + matchoff, *datalen - matchoff, NULL); ret = ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, SIP_HDR_CONTACT, NULL, @@ -1285,6 +1366,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, goto store_cseq; } + helper = rcu_dereference(nfct_help(ct)->helper); + if (!helper) + return NF_DROP; + exp = nf_ct_expect_alloc(ct); if (!exp) { nf_ct_helper_log(skb, ct, "cannot alloc expectation"); @@ -1295,14 +1380,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, if (sip_direct_signalling) saddr = &ct->tuplehash[!dir].tuple.src.u3; - helper = rcu_dereference(nfct_help(ct)->helper); - if (!helper) - return NF_DROP; - nf_ct_expect_init(exp, SIP_EXPECT_SIGNALLING, nf_ct_l3num(ct), saddr, &daddr, proto, NULL, &port); exp->timeout.expires = sip_timeout * HZ; - rcu_assign_pointer(exp->helper, helper); + rcu_assign_pointer(exp->assign_helper, helper); exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE; hooks = rcu_dereference(nf_nat_sip_hooks); @@ -1358,7 +1439,7 @@ static int process_register_response(struct sk_buff *skb, unsigned int protoff, if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_EXPIRES, &matchoff, &matchlen) > 0) - expires = simple_strtoul(*dptr + matchoff, NULL, 10); + expires = sip_strtouint(*dptr + matchoff, *datalen - matchoff, NULL); while (1) { unsigned int c_expires = expires; @@ -1418,10 +1499,12 @@ static int process_sip_response(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct = nf_ct_get(skb, &ctinfo); unsigned int matchoff, matchlen, matchend; unsigned int code, cseq, i; + char *end; if (*datalen < strlen("SIP/2.0 200")) return NF_ACCEPT; - code = simple_strtoul(*dptr + strlen("SIP/2.0 "), NULL, 10); + code = sip_strtouint(*dptr + strlen("SIP/2.0 "), + *datalen - strlen("SIP/2.0 "), NULL); if (!code) { nf_ct_helper_log(skb, ct, "cannot get code"); return NF_DROP; @@ -1432,8 +1515,8 @@ static int process_sip_response(struct sk_buff *skb, unsigned int protoff, nf_ct_helper_log(skb, ct, "cannot parse cseq"); return NF_DROP; } - cseq = simple_strtoul(*dptr + matchoff, NULL, 10); - if (!cseq && *(*dptr + matchoff) != '0') { + cseq = sip_strtouint(*dptr + matchoff, *datalen - matchoff, (char **)&end); + if (*dptr + matchoff == end) { nf_ct_helper_log(skb, ct, "cannot get cseq"); return NF_DROP; } @@ -1482,6 +1565,7 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff, for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) { const struct sip_handler *handler; + char *end; handler = &sip_handlers[i]; if (handler->request == NULL) @@ -1498,8 +1582,8 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff, nf_ct_helper_log(skb, ct, "cannot parse cseq"); return NF_DROP; } - cseq = simple_strtoul(*dptr + matchoff, NULL, 10); - if (!cseq && *(*dptr + matchoff) != '0') { + cseq = sip_strtouint(*dptr + matchoff, *datalen - matchoff, (char **)&end); + if (*dptr + matchoff == end) { nf_ct_helper_log(skb, ct, "cannot get cseq"); return NF_DROP; } @@ -1575,7 +1659,7 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, &matchoff, &matchlen) <= 0) break; - clen = simple_strtoul(dptr + matchoff, (char **)&end, 10); + clen = sip_strtouint(dptr + matchoff, datalen - matchoff, (char **)&end); if (dptr + matchoff == end) break; diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c index e348fb90b8dc..3b0a70e154cd 100644 --- a/net/netfilter/nf_dup_netdev.c +++ b/net/netfilter/nf_dup_netdev.c @@ -13,22 +13,6 @@ #include <net/netfilter/nf_tables_offload.h> #include <net/netfilter/nf_dup_netdev.h> -#define NF_RECURSION_LIMIT 2 - -#ifndef CONFIG_PREEMPT_RT -static u8 *nf_get_nf_dup_skb_recursion(void) -{ - return this_cpu_ptr(&softnet_data.xmit.nf_dup_skb_recursion); -} -#else - -static u8 *nf_get_nf_dup_skb_recursion(void) -{ - return ¤t->net_xmit.nf_dup_skb_recursion; -} - -#endif - static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev, enum nf_dev_hooks hook) { diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 2c4140e6f53c..785d8c244a77 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -122,6 +122,7 @@ static int flow_offload_fill_route(struct flow_offload *flow, flow_tuple->tun = route->tuple[dir].in.tun; flow_tuple->encap_num = route->tuple[dir].in.num_encaps; + flow_tuple->needs_gso_segment = route->tuple[dir].out.needs_gso_segment; flow_tuple->tun_num = route->tuple[dir].in.num_tuns; switch (route->tuple[dir].xmit_type) { diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index fd56d663cb5b..9c05a50d6013 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -445,13 +445,13 @@ static void nf_flow_encap_pop(struct nf_flowtable_ctx *ctx, switch (skb->protocol) { case htons(ETH_P_8021Q): vlan_hdr = (struct vlan_hdr *)skb->data; - __skb_pull(skb, VLAN_HLEN); + skb_pull_rcsum(skb, VLAN_HLEN); vlan_set_encap_proto(skb, vlan_hdr); skb_reset_network_header(skb); break; case htons(ETH_P_PPP_SES): skb->protocol = __nf_flow_pppoe_proto(skb); - skb_pull(skb, PPPOE_SES_HLEN); + skb_pull_rcsum(skb, PPPOE_SES_HLEN); skb_reset_network_header(skb); break; } @@ -462,23 +462,6 @@ static void nf_flow_encap_pop(struct nf_flowtable_ctx *ctx, nf_flow_ip_tunnel_pop(ctx, skb); } -struct nf_flow_xmit { - const void *dest; - const void *source; - struct net_device *outdev; -}; - -static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb, - struct nf_flow_xmit *xmit) -{ - skb->dev = xmit->outdev; - dev_hard_header(skb, skb->dev, ntohs(skb->protocol), - xmit->dest, xmit->source, skb->len); - dev_queue_xmit(skb); - - return NF_STOLEN; -} - static struct flow_offload_tuple_rhash * nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx, struct nf_flowtable *flow_table, struct sk_buff *skb) @@ -524,7 +507,7 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx, return 0; } - if (skb_try_make_writable(skb, thoff + ctx->hdrsize)) + if (skb_ensure_writable(skb, thoff + ctx->hdrsize)) return -1; flow_offload_refresh(flow_table, flow, false); @@ -544,7 +527,34 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx, return 1; } -static int nf_flow_pppoe_push(struct sk_buff *skb, u16 id) +/* Similar to skb_vlan_push. */ +static int nf_flow_vlan_push(struct sk_buff *skb, __be16 proto, u16 id, + u32 needed_headroom) +{ + if (skb_vlan_tag_present(skb)) { + struct vlan_hdr *vhdr; + + if (skb_cow_head(skb, needed_headroom + VLAN_HLEN)) + return -1; + + __skb_push(skb, VLAN_HLEN); + if (skb_mac_header_was_set(skb)) + skb->mac_header -= VLAN_HLEN; + + vhdr = (struct vlan_hdr *)skb->data; + skb->network_header -= VLAN_HLEN; + vhdr->h_vlan_TCI = htons(skb_vlan_tag_get(skb)); + vhdr->h_vlan_encapsulated_proto = skb->protocol; + skb->protocol = skb->vlan_proto; + skb_postpush_rcsum(skb, skb->data, VLAN_HLEN); + } + __vlan_hwaccel_put_tag(skb, proto, id); + + return 0; +} + +static int nf_flow_pppoe_push(struct sk_buff *skb, u16 id, + u32 needed_headroom) { int data_len = skb->len + sizeof(__be16); struct ppp_hdr { @@ -553,7 +563,7 @@ static int nf_flow_pppoe_push(struct sk_buff *skb, u16 id) } *ph; __be16 proto; - if (skb_cow_head(skb, PPPOE_SES_HLEN)) + if (skb_cow_head(skb, needed_headroom + PPPOE_SES_HLEN)) return -1; switch (skb->protocol) { @@ -730,21 +740,24 @@ static int nf_flow_tunnel_v6_push(struct net *net, struct sk_buff *skb, } static int nf_flow_encap_push(struct sk_buff *skb, - struct flow_offload_tuple *tuple) + struct flow_offload_tuple *tuple, + struct net_device *outdev) { + u32 needed_headroom = LL_RESERVED_SPACE(outdev); int i; - for (i = 0; i < tuple->encap_num; i++) { + for (i = tuple->encap_num - 1; i >= 0; i--) { switch (tuple->encap[i].proto) { case htons(ETH_P_8021Q): case htons(ETH_P_8021AD): - skb_reset_mac_header(skb); - if (skb_vlan_push(skb, tuple->encap[i].proto, - tuple->encap[i].id) < 0) + if (nf_flow_vlan_push(skb, tuple->encap[i].proto, + tuple->encap[i].id, + needed_headroom) < 0) return -1; break; case htons(ETH_P_PPP_SES): - if (nf_flow_pppoe_push(skb, tuple->encap[i].id) < 0) + if (nf_flow_pppoe_push(skb, tuple->encap[i].id, + needed_headroom) < 0) return -1; break; } @@ -753,6 +766,76 @@ static int nf_flow_encap_push(struct sk_buff *skb, return 0; } +struct nf_flow_xmit { + const void *dest; + const void *source; + struct net_device *outdev; + struct flow_offload_tuple *tuple; + bool needs_gso_segment; +}; + +static void __nf_flow_queue_xmit(struct net *net, struct sk_buff *skb, + struct nf_flow_xmit *xmit) +{ + struct net_device *dev = xmit->outdev; + unsigned int hh_len = LL_RESERVED_SPACE(dev); + + if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { + skb = skb_expand_head(skb, hh_len); + if (!skb) + return; + } + + skb->dev = dev; + dev_hard_header(skb, dev, ntohs(skb->protocol), + xmit->dest, xmit->source, skb->len); + dev_queue_xmit(skb); +} + +static unsigned int nf_flow_encap_gso_xmit(struct net *net, struct sk_buff *skb, + struct nf_flow_xmit *xmit) +{ + struct sk_buff *segs, *nskb; + + segs = skb_gso_segment(skb, 0); + if (IS_ERR(segs)) + return NF_DROP; + + if (segs) + consume_skb(skb); + else + segs = skb; + + skb_list_walk_safe(segs, segs, nskb) { + skb_mark_not_on_list(segs); + + if (nf_flow_encap_push(segs, xmit->tuple, xmit->outdev) < 0) { + kfree_skb(segs); + kfree_skb_list(nskb); + return NF_STOLEN; + } + __nf_flow_queue_xmit(net, segs, xmit); + } + + return NF_STOLEN; +} + +static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb, + struct nf_flow_xmit *xmit) +{ + if (xmit->tuple->encap_num) { + if (skb_is_gso(skb) && xmit->needs_gso_segment) + return nf_flow_encap_gso_xmit(net, skb, xmit); + + if (nf_flow_encap_push(skb, xmit->tuple, xmit->outdev) < 0) + return NF_DROP; + } + + __nf_flow_queue_xmit(net, skb, xmit); + + return NF_STOLEN; +} + unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) @@ -797,9 +880,6 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, if (nf_flow_tunnel_v4_push(state->net, skb, other_tuple, &ip_daddr) < 0) return NF_DROP; - if (nf_flow_encap_push(skb, other_tuple) < 0) - return NF_DROP; - switch (tuplehash->tuple.xmit_type) { case FLOW_OFFLOAD_XMIT_NEIGH: rt = dst_rtable(tuplehash->tuple.dst_cache); @@ -829,6 +909,8 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, WARN_ON_ONCE(1); return NF_DROP; } + xmit.tuple = other_tuple; + xmit.needs_gso_segment = tuplehash->tuple.needs_gso_segment; return nf_flow_queue_xmit(state->net, skb, &xmit); } @@ -1037,7 +1119,7 @@ static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx, return 0; } - if (skb_try_make_writable(skb, thoff + ctx->hdrsize)) + if (skb_ensure_writable(skb, thoff + ctx->hdrsize)) return -1; flow_offload_refresh(flow_table, flow, false); @@ -1119,9 +1201,6 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, &ip6_daddr, encap_limit) < 0) return NF_DROP; - if (nf_flow_encap_push(skb, other_tuple) < 0) - return NF_DROP; - switch (tuplehash->tuple.xmit_type) { case FLOW_OFFLOAD_XMIT_NEIGH: rt = dst_rt6_info(tuplehash->tuple.dst_cache); @@ -1151,6 +1230,8 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, WARN_ON_ONCE(1); return NF_DROP; } + xmit.tuple = other_tuple; + xmit.needs_gso_segment = tuplehash->tuple.needs_gso_segment; return nf_flow_queue_xmit(state->net, skb, &xmit); } diff --git a/net/netfilter/nf_flow_table_path.c b/net/netfilter/nf_flow_table_path.c index 6bb9579dcc2a..9e88ea6a2eef 100644 --- a/net/netfilter/nf_flow_table_path.c +++ b/net/netfilter/nf_flow_table_path.c @@ -86,6 +86,7 @@ struct nft_forward_info { u8 ingress_vlans; u8 h_source[ETH_ALEN]; u8 h_dest[ETH_ALEN]; + bool needs_gso_segment; enum flow_offload_xmit_type xmit_type; }; @@ -138,8 +139,11 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack, path->encap.proto; info->num_encaps++; } - if (path->type == DEV_PATH_PPPOE) + if (path->type == DEV_PATH_PPPOE) { memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN); + info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT; + info->needs_gso_segment = 1; + } break; case DEV_PATH_BRIDGE: if (is_zero_ether_addr(info->h_source)) @@ -279,6 +283,7 @@ static void nft_dev_forward_path(const struct nft_pktinfo *pkt, memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN); route->tuple[dir].xmit_type = info.xmit_type; } + route->tuple[dir].out.needs_gso_segment = info.needs_gso_segment; } int nft_flow_route(const struct nft_pktinfo *pkt, const struct nf_conn *ct, diff --git a/net/netfilter/nf_nat_amanda.c b/net/netfilter/nf_nat_amanda.c index 98deef6cde69..8f1054920a85 100644 --- a/net/netfilter/nf_nat_amanda.c +++ b/net/netfilter/nf_nat_amanda.c @@ -50,7 +50,7 @@ static unsigned int help(struct sk_buff *skb, return NF_DROP; } - sprintf(buffer, "%u", port); + snprintf(buffer, sizeof(buffer), "%u", port); if (!nf_nat_mangle_udp_packet(skb, exp->master, ctinfo, protoff, matchoff, matchlen, buffer, strlen(buffer))) { diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 83b2b5e9759a..74ec224ce0d6 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -1222,9 +1222,11 @@ int nf_nat_register_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops, ret = nf_register_net_hooks(net, nat_ops, ops_count); if (ret < 0) { mutex_unlock(&nf_nat_proto_mutex); - for (i = 0; i < ops_count; i++) - kfree(nat_ops[i].priv); - kfree(nat_ops); + for (i = 0; i < ops_count; i++) { + priv = nat_ops[i].priv; + kfree_rcu(priv, rcu_head); + } + kfree_rcu(nat_ops, rcu); return ret; } @@ -1288,7 +1290,7 @@ void nf_nat_unregister_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops, } nat_proto_net->nat_hook_ops = NULL; - kfree(nat_ops); + kfree_rcu(nat_ops, rcu); } unlock: mutex_unlock(&nf_nat_proto_mutex); diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c index cf4aeb299bde..9fbfc6bff0c2 100644 --- a/net/netfilter/nf_nat_sip.c +++ b/net/netfilter/nf_nat_sip.c @@ -68,25 +68,27 @@ static unsigned int mangle_packet(struct sk_buff *skb, unsigned int protoff, } static int sip_sprintf_addr(const struct nf_conn *ct, char *buffer, + size_t size, const union nf_inet_addr *addr, bool delim) { if (nf_ct_l3num(ct) == NFPROTO_IPV4) - return sprintf(buffer, "%pI4", &addr->ip); + return scnprintf(buffer, size, "%pI4", &addr->ip); else { if (delim) - return sprintf(buffer, "[%pI6c]", &addr->ip6); + return scnprintf(buffer, size, "[%pI6c]", &addr->ip6); else - return sprintf(buffer, "%pI6c", &addr->ip6); + return scnprintf(buffer, size, "%pI6c", &addr->ip6); } } static int sip_sprintf_addr_port(const struct nf_conn *ct, char *buffer, + size_t size, const union nf_inet_addr *addr, u16 port) { if (nf_ct_l3num(ct) == NFPROTO_IPV4) - return sprintf(buffer, "%pI4:%u", &addr->ip, port); + return scnprintf(buffer, size, "%pI4:%u", &addr->ip, port); else - return sprintf(buffer, "[%pI6c]:%u", &addr->ip6, port); + return scnprintf(buffer, size, "[%pI6c]:%u", &addr->ip6, port); } static int map_addr(struct sk_buff *skb, unsigned int protoff, @@ -119,7 +121,7 @@ static int map_addr(struct sk_buff *skb, unsigned int protoff, if (nf_inet_addr_cmp(&newaddr, addr) && newport == port) return 1; - buflen = sip_sprintf_addr_port(ct, buffer, &newaddr, ntohs(newport)); + buflen = sip_sprintf_addr_port(ct, buffer, sizeof(buffer), &newaddr, ntohs(newport)); return mangle_packet(skb, protoff, dataoff, dptr, datalen, matchoff, matchlen, buffer, buflen); } @@ -212,7 +214,7 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff, &addr, true) > 0 && nf_inet_addr_cmp(&addr, &ct->tuplehash[dir].tuple.src.u3) && !nf_inet_addr_cmp(&addr, &ct->tuplehash[!dir].tuple.dst.u3)) { - buflen = sip_sprintf_addr(ct, buffer, + buflen = sip_sprintf_addr(ct, buffer, sizeof(buffer), &ct->tuplehash[!dir].tuple.dst.u3, true); if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, @@ -229,7 +231,7 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff, &addr, false) > 0 && nf_inet_addr_cmp(&addr, &ct->tuplehash[dir].tuple.dst.u3) && !nf_inet_addr_cmp(&addr, &ct->tuplehash[!dir].tuple.src.u3)) { - buflen = sip_sprintf_addr(ct, buffer, + buflen = sip_sprintf_addr(ct, buffer, sizeof(buffer), &ct->tuplehash[!dir].tuple.src.u3, false); if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, @@ -244,10 +246,11 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff, if (ct_sip_parse_numerical_param(ct, *dptr, matchend, *datalen, "rport=", &poff, &plen, &n) > 0 && + n >= 1024 && n <= 65535 && htons(n) == ct->tuplehash[dir].tuple.dst.u.udp.port && htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) { __be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port; - buflen = sprintf(buffer, "%u", ntohs(p)); + buflen = scnprintf(buffer, sizeof(buffer), "%u", ntohs(p)); if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, poff, plen, buffer, buflen)) { nf_ct_helper_log(skb, ct, "cannot mangle rport"); @@ -418,7 +421,8 @@ static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff, if (!nf_inet_addr_cmp(&exp->tuple.dst.u3, &exp->saved_addr) || exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) { - buflen = sip_sprintf_addr_port(ct, buffer, &newaddr, port); + buflen = sip_sprintf_addr_port(ct, buffer, sizeof(buffer), + &newaddr, port); if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, matchoff, matchlen, buffer, buflen)) { nf_ct_helper_log(skb, ct, "cannot mangle packet"); @@ -438,8 +442,8 @@ static int mangle_content_len(struct sk_buff *skb, unsigned int protoff, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + char buffer[sizeof("4294967295")]; unsigned int matchoff, matchlen; - char buffer[sizeof("65536")]; int buflen, c_len; /* Get actual SDP length */ @@ -454,7 +458,7 @@ static int mangle_content_len(struct sk_buff *skb, unsigned int protoff, &matchoff, &matchlen) <= 0) return 0; - buflen = sprintf(buffer, "%u", c_len); + buflen = scnprintf(buffer, sizeof(buffer), "%u", c_len); return mangle_packet(skb, protoff, dataoff, dptr, datalen, matchoff, matchlen, buffer, buflen); } @@ -491,7 +495,7 @@ static unsigned int nf_nat_sdp_addr(struct sk_buff *skb, unsigned int protoff, char buffer[INET6_ADDRSTRLEN]; unsigned int buflen; - buflen = sip_sprintf_addr(ct, buffer, addr, false); + buflen = sip_sprintf_addr(ct, buffer, sizeof(buffer), addr, false); if (mangle_sdp_packet(skb, protoff, dataoff, dptr, datalen, sdpoff, type, term, buffer, buflen)) return 0; @@ -509,7 +513,7 @@ static unsigned int nf_nat_sdp_port(struct sk_buff *skb, unsigned int protoff, char buffer[sizeof("nnnnn")]; unsigned int buflen; - buflen = sprintf(buffer, "%u", port); + buflen = scnprintf(buffer, sizeof(buffer), "%u", port); if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, matchoff, matchlen, buffer, buflen)) return 0; @@ -529,7 +533,7 @@ static unsigned int nf_nat_sdp_session(struct sk_buff *skb, unsigned int protoff unsigned int buflen; /* Mangle session description owner and contact addresses */ - buflen = sip_sprintf_addr(ct, buffer, addr, false); + buflen = sip_sprintf_addr(ct, buffer, sizeof(buffer), addr, false); if (mangle_sdp_packet(skb, protoff, dataoff, dptr, datalen, sdpoff, SDP_HDR_OWNER, SDP_HDR_MEDIA, buffer, buflen)) return 0; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8537b94653d3..87387adbca65 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -374,7 +374,40 @@ static void nft_netdev_hook_free_rcu(struct nft_hook *hook) call_rcu(&hook->rcu, __nft_netdev_hook_free_rcu); } +static void nft_netdev_hook_unlink_free_rcu(struct nft_hook *hook) +{ + list_del_rcu(&hook->list); + nft_netdev_hook_free_rcu(hook); +} + +static void nft_trans_hook_destroy(struct nft_trans_hook *trans_hook) +{ + list_del(&trans_hook->list); + kfree(trans_hook); +} + +static void nft_netdev_unregister_trans_hook(struct net *net, + const struct nft_table *table, + struct list_head *hook_list) +{ + struct nft_trans_hook *trans_hook, *next; + struct nf_hook_ops *ops; + struct nft_hook *hook; + + list_for_each_entry_safe(trans_hook, next, hook_list, list) { + hook = trans_hook->hook; + + if (!(table->flags & NFT_TABLE_F_DORMANT)) { + list_for_each_entry(ops, &hook->ops_list, list) + nf_unregister_net_hook(net, ops); + } + nft_netdev_hook_unlink_free_rcu(hook); + nft_trans_hook_destroy(trans_hook); + } +} + static void nft_netdev_unregister_hooks(struct net *net, + const struct nft_table *table, struct list_head *hook_list, bool release_netdev) { @@ -382,12 +415,12 @@ static void nft_netdev_unregister_hooks(struct net *net, struct nf_hook_ops *ops; list_for_each_entry_safe(hook, next, hook_list, list) { - list_for_each_entry(ops, &hook->ops_list, list) - nf_unregister_net_hook(net, ops); - if (release_netdev) { - list_del(&hook->list); - nft_netdev_hook_free_rcu(hook); + if (!(table->flags & NFT_TABLE_F_DORMANT)) { + list_for_each_entry(ops, &hook->ops_list, list) + nf_unregister_net_hook(net, ops); } + if (release_netdev) + nft_netdev_hook_unlink_free_rcu(hook); } } @@ -422,20 +455,25 @@ static void __nf_tables_unregister_hook(struct net *net, struct nft_base_chain *basechain; const struct nf_hook_ops *ops; - if (table->flags & NFT_TABLE_F_DORMANT || - !nft_is_base_chain(chain)) + if (!nft_is_base_chain(chain)) return; basechain = nft_base_chain(chain); ops = &basechain->ops; + /* must also be called for dormant tables */ + if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) { + nft_netdev_unregister_hooks(net, table, &basechain->hook_list, + release_netdev); + return; + } + + if (table->flags & NFT_TABLE_F_DORMANT) + return; + if (basechain->type->ops_unregister) return basechain->type->ops_unregister(net, ops); - if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) - nft_netdev_unregister_hooks(net, &basechain->hook_list, - release_netdev); - else - nf_unregister_net_hook(net, &basechain->ops); + nf_unregister_net_hook(net, &basechain->ops); } static void nf_tables_unregister_hook(struct net *net, @@ -1942,15 +1980,69 @@ static int nft_nla_put_hook_dev(struct sk_buff *skb, struct nft_hook *hook) return nla_put_string(skb, attr, hook->ifname); } +struct nft_hook_dump_ctx { + struct nft_hook *first; + int n; +}; + +static int nft_dump_basechain_hook_one(struct sk_buff *skb, + struct nft_hook *hook, + struct nft_hook_dump_ctx *dump_ctx) +{ + if (!dump_ctx->first) + dump_ctx->first = hook; + + if (nft_nla_put_hook_dev(skb, hook)) + return -1; + + dump_ctx->n++; + + return 0; +} + +static int nft_dump_basechain_hook_list(struct sk_buff *skb, + const struct net *net, + const struct list_head *hook_list, + struct nft_hook_dump_ctx *dump_ctx) +{ + struct nft_hook *hook; + int err; + + list_for_each_entry_rcu(hook, hook_list, list, + lockdep_commit_lock_is_held(net)) { + err = nft_dump_basechain_hook_one(skb, hook, dump_ctx); + if (err < 0) + return err; + } + + return 0; +} + +static int nft_dump_basechain_trans_hook_list(struct sk_buff *skb, + const struct list_head *trans_hook_list, + struct nft_hook_dump_ctx *dump_ctx) +{ + struct nft_trans_hook *trans_hook; + int err; + + list_for_each_entry(trans_hook, trans_hook_list, list) { + err = nft_dump_basechain_hook_one(skb, trans_hook->hook, dump_ctx); + if (err < 0) + return err; + } + + return 0; +} + static int nft_dump_basechain_hook(struct sk_buff *skb, const struct net *net, int family, const struct nft_base_chain *basechain, - const struct list_head *hook_list) + const struct list_head *hook_list, + const struct list_head *trans_hook_list) { const struct nf_hook_ops *ops = &basechain->ops; - struct nft_hook *hook, *first = NULL; + struct nft_hook_dump_ctx dump_hook_ctx = {}; struct nlattr *nest, *nest_devs; - int n = 0; nest = nla_nest_start_noflag(skb, NFTA_CHAIN_HOOK); if (nest == NULL) @@ -1965,23 +2057,23 @@ static int nft_dump_basechain_hook(struct sk_buff *skb, if (!nest_devs) goto nla_put_failure; - if (!hook_list) + if (!hook_list && !trans_hook_list) hook_list = &basechain->hook_list; - list_for_each_entry_rcu(hook, hook_list, list, - lockdep_commit_lock_is_held(net)) { - if (!first) - first = hook; - - if (nft_nla_put_hook_dev(skb, hook)) - goto nla_put_failure; - n++; + if (hook_list && + nft_dump_basechain_hook_list(skb, net, hook_list, &dump_hook_ctx)) { + goto nla_put_failure; + } else if (trans_hook_list && + nft_dump_basechain_trans_hook_list(skb, trans_hook_list, + &dump_hook_ctx)) { + goto nla_put_failure; } + nla_nest_end(skb, nest_devs); - if (n == 1 && - !hook_is_prefix(first) && - nla_put_string(skb, NFTA_HOOK_DEV, first->ifname)) + if (dump_hook_ctx.n == 1 && + !hook_is_prefix(dump_hook_ctx.first) && + nla_put_string(skb, NFTA_HOOK_DEV, dump_hook_ctx.first->ifname)) goto nla_put_failure; } nla_nest_end(skb, nest); @@ -1995,7 +2087,8 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, u32 portid, u32 seq, int event, u32 flags, int family, const struct nft_table *table, const struct nft_chain *chain, - const struct list_head *hook_list) + const struct list_head *hook_list, + const struct list_head *trans_hook_list) { struct nlmsghdr *nlh; @@ -2011,7 +2104,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, NFTA_CHAIN_PAD)) goto nla_put_failure; - if (!hook_list && + if (!hook_list && !trans_hook_list && (event == NFT_MSG_DELCHAIN || event == NFT_MSG_DESTROYCHAIN)) { nlmsg_end(skb, nlh); @@ -2022,7 +2115,8 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, const struct nft_base_chain *basechain = nft_base_chain(chain); struct nft_stats __percpu *stats; - if (nft_dump_basechain_hook(skb, net, family, basechain, hook_list)) + if (nft_dump_basechain_hook(skb, net, family, basechain, + hook_list, trans_hook_list)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_CHAIN_POLICY, @@ -2058,7 +2152,8 @@ nla_put_failure: } static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event, - const struct list_head *hook_list) + const struct list_head *hook_list, + const struct list_head *trans_hook_list) { struct nftables_pernet *nft_net; struct sk_buff *skb; @@ -2078,7 +2173,7 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event, err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq, event, flags, ctx->family, ctx->table, - ctx->chain, hook_list); + ctx->chain, hook_list, trans_hook_list); if (err < 0) { kfree_skb(skb); goto err; @@ -2124,7 +2219,7 @@ static int nf_tables_dump_chains(struct sk_buff *skb, NFT_MSG_NEWCHAIN, NLM_F_MULTI, table->family, table, - chain, NULL) < 0) + chain, NULL, NULL) < 0) goto done; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); @@ -2178,7 +2273,7 @@ static int nf_tables_getchain(struct sk_buff *skb, const struct nfnl_info *info, err = nf_tables_fill_chain_info(skb2, net, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, - 0, family, table, chain, NULL); + 0, family, table, chain, NULL, NULL); if (err < 0) goto err_fill_chain_info; @@ -2271,10 +2366,8 @@ void nf_tables_chain_destroy(struct nft_chain *chain) if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) { list_for_each_entry_safe(hook, next, - &basechain->hook_list, list) { - list_del_rcu(&hook->list); - nft_netdev_hook_free_rcu(hook); - } + &basechain->hook_list, list) + nft_netdev_hook_unlink_free_rcu(hook); } module_put(basechain->type->owner); if (rcu_access_pointer(basechain->stats)) { @@ -2343,8 +2436,12 @@ static struct nft_hook *nft_hook_list_find(struct list_head *hook_list, list_for_each_entry(hook, hook_list, list) { if (!strncmp(hook->ifname, this->ifname, - min(hook->ifnamelen, this->ifnamelen))) + min(hook->ifnamelen, this->ifnamelen))) { + if (hook->flags & NFT_HOOK_REMOVE) + continue; + return hook; + } } return NULL; @@ -2974,6 +3071,7 @@ err_hooks: list_for_each_entry(ops, &h->ops_list, list) nf_unregister_net_hook(ctx->net, ops); } + /* hook.list is on stack, no need for list_del_rcu() */ list_del(&h->list); nft_netdev_hook_free_rcu(h); } @@ -3102,6 +3200,32 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info, return nf_tables_addchain(&ctx, family, policy, flags, extack); } +static int nft_trans_delhook(struct nft_hook *hook, + struct list_head *del_list) +{ + struct nft_trans_hook *trans_hook; + + trans_hook = kmalloc_obj(*trans_hook, GFP_KERNEL); + if (!trans_hook) + return -ENOMEM; + + trans_hook->hook = hook; + list_add_tail(&trans_hook->list, del_list); + hook->flags |= NFT_HOOK_REMOVE; + + return 0; +} + +static void nft_trans_delhook_abort(struct list_head *del_list) +{ + struct nft_trans_hook *trans_hook, *next; + + list_for_each_entry_safe(trans_hook, next, del_list, list) { + trans_hook->hook->flags &= ~NFT_HOOK_REMOVE; + nft_trans_hook_destroy(trans_hook); + } +} + static int nft_delchain_hook(struct nft_ctx *ctx, struct nft_base_chain *basechain, struct netlink_ext_ack *extack) @@ -3128,7 +3252,10 @@ static int nft_delchain_hook(struct nft_ctx *ctx, err = -ENOENT; goto err_chain_del_hook; } - list_move(&hook->list, &chain_del_list); + if (nft_trans_delhook(hook, &chain_del_list) < 0) { + err = -ENOMEM; + goto err_chain_del_hook; + } } trans = nft_trans_alloc_chain(ctx, NFT_MSG_DELCHAIN); @@ -3148,7 +3275,7 @@ static int nft_delchain_hook(struct nft_ctx *ctx, return 0; err_chain_del_hook: - list_splice(&chain_del_list, &basechain->hook_list); + nft_trans_delhook_abort(&chain_del_list); nft_chain_release_hook(&chain_hook); return err; @@ -4086,6 +4213,7 @@ static int nft_table_validate(struct net *net, const struct nft_table *table) struct nft_chain *chain; struct nft_ctx ctx = { .net = net, + .table = (struct nft_table *)table, .family = table->family, }; int err = 0; @@ -8852,10 +8980,8 @@ static void __nft_unregister_flowtable_net_hooks(struct net *net, list_for_each_entry_safe(hook, next, hook_list, list) { list_for_each_entry(ops, &hook->ops_list, list) nft_unregister_flowtable_ops(net, flowtable, ops); - if (release_netdev) { - list_del(&hook->list); - nft_netdev_hook_free_rcu(hook); - } + if (release_netdev) + nft_netdev_hook_unlink_free_rcu(hook); } } @@ -8926,8 +9052,7 @@ err_unregister_net_hooks: nft_unregister_flowtable_ops(net, flowtable, ops); } - list_del_rcu(&hook->list); - nft_netdev_hook_free_rcu(hook); + nft_netdev_hook_unlink_free_rcu(hook); } return err; @@ -8937,9 +9062,25 @@ static void nft_hooks_destroy(struct list_head *hook_list) { struct nft_hook *hook, *next; - list_for_each_entry_safe(hook, next, hook_list, list) { - list_del_rcu(&hook->list); - nft_netdev_hook_free_rcu(hook); + list_for_each_entry_safe(hook, next, hook_list, list) + nft_netdev_hook_unlink_free_rcu(hook); +} + +static void nft_flowtable_unregister_trans_hook(struct net *net, + struct nft_flowtable *flowtable, + struct list_head *hook_list) +{ + struct nft_trans_hook *trans_hook, *next; + struct nf_hook_ops *ops; + struct nft_hook *hook; + + list_for_each_entry_safe(trans_hook, next, hook_list, list) { + hook = trans_hook->hook; + list_for_each_entry(ops, &hook->ops_list, list) + nft_unregister_flowtable_ops(net, flowtable, ops); + + nft_netdev_hook_unlink_free_rcu(hook); + nft_trans_hook_destroy(trans_hook); } } @@ -9028,8 +9169,7 @@ err_flowtable_update_hook: nft_unregister_flowtable_ops(ctx->net, flowtable, ops); } - list_del_rcu(&hook->list); - nft_netdev_hook_free_rcu(hook); + nft_netdev_hook_unlink_free_rcu(hook); } return err; @@ -9202,7 +9342,10 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx, err = -ENOENT; goto err_flowtable_del_hook; } - list_move(&hook->list, &flowtable_del_list); + if (nft_trans_delhook(hook, &flowtable_del_list) < 0) { + err = -ENOMEM; + goto err_flowtable_del_hook; + } } trans = nft_trans_alloc(ctx, NFT_MSG_DELFLOWTABLE, @@ -9223,7 +9366,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx, return 0; err_flowtable_del_hook: - list_splice(&flowtable_del_list, &flowtable->hook_list); + nft_trans_delhook_abort(&flowtable_del_list); nft_flowtable_hook_release(&flowtable_hook); return err; @@ -9288,8 +9431,10 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, u32 portid, u32 seq, int event, u32 flags, int family, struct nft_flowtable *flowtable, - struct list_head *hook_list) + struct list_head *hook_list, + struct list_head *trans_hook_list) { + struct nft_trans_hook *trans_hook; struct nlattr *nest, *nest_devs; struct nft_hook *hook; struct nlmsghdr *nlh; @@ -9306,7 +9451,7 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, NFTA_FLOWTABLE_PAD)) goto nla_put_failure; - if (!hook_list && + if (!hook_list && !trans_hook_list && (event == NFT_MSG_DELFLOWTABLE || event == NFT_MSG_DESTROYFLOWTABLE)) { nlmsg_end(skb, nlh); @@ -9328,13 +9473,20 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, if (!nest_devs) goto nla_put_failure; - if (!hook_list) + if (!hook_list && !trans_hook_list) hook_list = &flowtable->hook_list; - list_for_each_entry_rcu(hook, hook_list, list, - lockdep_commit_lock_is_held(net)) { - if (nft_nla_put_hook_dev(skb, hook)) - goto nla_put_failure; + if (hook_list) { + list_for_each_entry_rcu(hook, hook_list, list, + lockdep_commit_lock_is_held(net)) { + if (nft_nla_put_hook_dev(skb, hook)) + goto nla_put_failure; + } + } else if (trans_hook_list) { + list_for_each_entry(trans_hook, trans_hook_list, list) { + if (nft_nla_put_hook_dev(skb, trans_hook->hook)) + goto nla_put_failure; + } } nla_nest_end(skb, nest_devs); nla_nest_end(skb, nest); @@ -9388,7 +9540,7 @@ static int nf_tables_dump_flowtable(struct sk_buff *skb, NFT_MSG_NEWFLOWTABLE, NLM_F_MULTI | NLM_F_APPEND, table->family, - flowtable, NULL) < 0) + flowtable, NULL, NULL) < 0) goto done; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); @@ -9488,7 +9640,7 @@ static int nf_tables_getflowtable(struct sk_buff *skb, err = nf_tables_fill_flowtable_info(skb2, net, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, NFT_MSG_NEWFLOWTABLE, 0, family, - flowtable, NULL); + flowtable, NULL, NULL); if (err < 0) goto err_fill_flowtable_info; @@ -9501,7 +9653,9 @@ err_fill_flowtable_info: static void nf_tables_flowtable_notify(struct nft_ctx *ctx, struct nft_flowtable *flowtable, - struct list_head *hook_list, int event) + struct list_head *hook_list, + struct list_head *trans_hook_list, + int event) { struct nftables_pernet *nft_net = nft_pernet(ctx->net); struct sk_buff *skb; @@ -9521,7 +9675,8 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx, err = nf_tables_fill_flowtable_info(skb, ctx->net, ctx->portid, ctx->seq, event, flags, - ctx->family, flowtable, hook_list); + ctx->family, flowtable, + hook_list, trans_hook_list); if (err < 0) { kfree_skb(skb); goto err; @@ -9535,13 +9690,8 @@ err: static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable) { - struct nft_hook *hook, *next; - flowtable->data.type->free(&flowtable->data); - list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) { - list_del_rcu(&hook->list); - nft_netdev_hook_free_rcu(hook); - } + nft_hooks_destroy(&flowtable->hook_list); kfree(flowtable->name); module_put(flowtable->data.type->owner); kfree(flowtable); @@ -10060,9 +10210,7 @@ static void nft_commit_release(struct nft_trans *trans) break; case NFT_MSG_DELCHAIN: case NFT_MSG_DESTROYCHAIN: - if (nft_trans_chain_update(trans)) - nft_hooks_destroy(&nft_trans_chain_hooks(trans)); - else + if (!nft_trans_chain_update(trans)) nf_tables_chain_destroy(nft_trans_chain(trans)); break; case NFT_MSG_DELRULE: @@ -10083,9 +10231,7 @@ static void nft_commit_release(struct nft_trans *trans) break; case NFT_MSG_DELFLOWTABLE: case NFT_MSG_DESTROYFLOWTABLE: - if (nft_trans_flowtable_update(trans)) - nft_hooks_destroy(&nft_trans_flowtable_hooks(trans)); - else + if (!nft_trans_flowtable_update(trans)) nf_tables_flowtable_destroy(nft_trans_flowtable(trans)); break; } @@ -10845,31 +10991,28 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) if (nft_trans_chain_update(trans)) { nft_chain_commit_update(nft_trans_container_chain(trans)); nf_tables_chain_notify(&ctx, NFT_MSG_NEWCHAIN, - &nft_trans_chain_hooks(trans)); - list_splice(&nft_trans_chain_hooks(trans), - &nft_trans_basechain(trans)->hook_list); + &nft_trans_chain_hooks(trans), NULL); + list_splice_rcu(&nft_trans_chain_hooks(trans), + &nft_trans_basechain(trans)->hook_list); /* trans destroyed after rcu grace period */ } else { nft_chain_commit_drop_policy(nft_trans_container_chain(trans)); nft_clear(net, nft_trans_chain(trans)); - nf_tables_chain_notify(&ctx, NFT_MSG_NEWCHAIN, NULL); + nf_tables_chain_notify(&ctx, NFT_MSG_NEWCHAIN, NULL, NULL); nft_trans_destroy(trans); } break; case NFT_MSG_DELCHAIN: case NFT_MSG_DESTROYCHAIN: if (nft_trans_chain_update(trans)) { - nf_tables_chain_notify(&ctx, NFT_MSG_DELCHAIN, + nf_tables_chain_notify(&ctx, NFT_MSG_DELCHAIN, NULL, &nft_trans_chain_hooks(trans)); - if (!(table->flags & NFT_TABLE_F_DORMANT)) { - nft_netdev_unregister_hooks(net, - &nft_trans_chain_hooks(trans), - true); - } + nft_netdev_unregister_trans_hook(net, table, + &nft_trans_chain_hooks(trans)); } else { nft_chain_del(nft_trans_chain(trans)); nf_tables_chain_notify(&ctx, NFT_MSG_DELCHAIN, - NULL); + NULL, NULL); nf_tables_unregister_hook(ctx.net, ctx.table, nft_trans_chain(trans)); } @@ -10975,14 +11118,16 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), &nft_trans_flowtable_hooks(trans), + NULL, NFT_MSG_NEWFLOWTABLE); - list_splice(&nft_trans_flowtable_hooks(trans), - &nft_trans_flowtable(trans)->hook_list); + list_splice_rcu(&nft_trans_flowtable_hooks(trans), + &nft_trans_flowtable(trans)->hook_list); } else { nft_clear(net, nft_trans_flowtable(trans)); nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), NULL, + NULL, NFT_MSG_NEWFLOWTABLE); } nft_trans_destroy(trans); @@ -10992,16 +11137,18 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) if (nft_trans_flowtable_update(trans)) { nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), + NULL, &nft_trans_flowtable_hooks(trans), trans->msg_type); - nft_unregister_flowtable_net_hooks(net, - nft_trans_flowtable(trans), - &nft_trans_flowtable_hooks(trans)); + nft_flowtable_unregister_trans_hook(net, + nft_trans_flowtable(trans), + &nft_trans_flowtable_hooks(trans)); } else { list_del_rcu(&nft_trans_flowtable(trans)->list); nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), NULL, + NULL, trans->msg_type); nft_unregister_flowtable_net_hooks(net, nft_trans_flowtable(trans), @@ -11143,11 +11290,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) { - if (!(table->flags & NFT_TABLE_F_DORMANT)) { - nft_netdev_unregister_hooks(net, - &nft_trans_chain_hooks(trans), - true); - } + nft_netdev_unregister_hooks(net, table, + &nft_trans_chain_hooks(trans), + true); free_percpu(nft_trans_chain_stats(trans)); kfree(nft_trans_chain_name(trans)); nft_trans_destroy(trans); @@ -11165,8 +11310,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) case NFT_MSG_DELCHAIN: case NFT_MSG_DESTROYCHAIN: if (nft_trans_chain_update(trans)) { - list_splice(&nft_trans_chain_hooks(trans), - &nft_trans_basechain(trans)->hook_list); + nft_trans_delhook_abort(&nft_trans_chain_hooks(trans)); } else { nft_use_inc_restore(&table->use); nft_clear(trans->net, nft_trans_chain(trans)); @@ -11280,8 +11424,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) case NFT_MSG_DELFLOWTABLE: case NFT_MSG_DESTROYFLOWTABLE: if (nft_trans_flowtable_update(trans)) { - list_splice(&nft_trans_flowtable_hooks(trans), - &nft_trans_flowtable(trans)->hook_list); + nft_trans_delhook_abort(&nft_trans_flowtable_hooks(trans)); } else { nft_use_inc_restore(&table->use); nft_clear(trans->net, nft_trans_flowtable(trans)); diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 5ddd5b6e135f..8ab186f86dd4 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -153,7 +153,7 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr, if (priv->base == NFT_PAYLOAD_NETWORK_HEADER) ptr = skb_network_header(skb) + pkt->nhoff; else { - if (!(pkt->flags & NFT_PKTINFO_L4PROTO)) + if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff) return false; ptr = skb->data + nft_thoff(pkt); } diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c index d64ce21c7b55..acb753ec5697 100644 --- a/net/netfilter/nfnetlink_osf.c +++ b/net/netfilter/nfnetlink_osf.c @@ -31,26 +31,18 @@ EXPORT_SYMBOL_GPL(nf_osf_fingers); static inline int nf_osf_ttl(const struct sk_buff *skb, int ttl_check, unsigned char f_ttl) { - struct in_device *in_dev = __in_dev_get_rcu(skb->dev); const struct iphdr *ip = ip_hdr(skb); - const struct in_ifaddr *ifa; - int ret = 0; - if (ttl_check == NF_OSF_TTL_TRUE) + switch (ttl_check) { + case NF_OSF_TTL_TRUE: return ip->ttl == f_ttl; - if (ttl_check == NF_OSF_TTL_NOCHECK) - return 1; - else if (ip->ttl <= f_ttl) + break; + case NF_OSF_TTL_NOCHECK: return 1; - - in_dev_for_each_ifa_rcu(ifa, in_dev) { - if (inet_ifa_match(ip->saddr, ifa)) { - ret = (ip->ttl == f_ttl); - break; - } + case NF_OSF_TTL_LESS: + default: + return ip->ttl <= f_ttl; } - - return ret; } struct nf_osf_hdr_ctx { @@ -64,9 +56,9 @@ struct nf_osf_hdr_ctx { static bool nf_osf_match_one(const struct sk_buff *skb, const struct nf_osf_user_finger *f, int ttl_check, - struct nf_osf_hdr_ctx *ctx) + const struct nf_osf_hdr_ctx *ctx) { - const __u8 *optpinit = ctx->optp; + const __u8 *optp = ctx->optp; unsigned int check_WSS = 0; int fmatch = FMATCH_WRONG; int foptsize, optnum; @@ -95,17 +87,17 @@ static bool nf_osf_match_one(const struct sk_buff *skb, check_WSS = f->wss.wc; for (optnum = 0; optnum < f->opt_num; ++optnum) { - if (f->opt[optnum].kind == *ctx->optp) { + if (f->opt[optnum].kind == *optp) { __u32 len = f->opt[optnum].length; - const __u8 *optend = ctx->optp + len; + const __u8 *optend = optp + len; fmatch = FMATCH_OK; - switch (*ctx->optp) { + switch (*optp) { case OSFOPT_MSS: - mss = ctx->optp[3]; + mss = optp[3]; mss <<= 8; - mss |= ctx->optp[2]; + mss |= optp[2]; mss = ntohs((__force __be16)mss); break; @@ -113,7 +105,7 @@ static bool nf_osf_match_one(const struct sk_buff *skb, break; } - ctx->optp = optend; + optp = optend; } else fmatch = FMATCH_OPT_WRONG; @@ -156,9 +148,6 @@ static bool nf_osf_match_one(const struct sk_buff *skb, } } - if (fmatch != FMATCH_OK) - ctx->optp = optpinit; - return fmatch == FMATCH_OK; } @@ -320,6 +309,10 @@ static int nfnl_osf_add_callback(struct sk_buff *skb, if (f->opt_num > ARRAY_SIZE(f->opt)) return -EINVAL; + if (f->wss.wc >= OSF_WSS_MAX || + (f->wss.wc == OSF_WSS_MODULO && f->wss.val == 0)) + return -EINVAL; + for (i = 0; i < f->opt_num; i++) { if (!f->opt[i].length || f->opt[i].length > MAX_IPOPTLEN) return -EINVAL; diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 13808e9cd999..94dccdcfa06b 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -196,7 +196,8 @@ static int nft_bitwise_init_shift(struct nft_bitwise *priv, if (err < 0) return err; - if (priv->data.data[0] >= BITS_PER_TYPE(u32)) { + if (!priv->data.data[0] || + priv->data.data[0] >= BITS_PER_TYPE(u32)) { nft_data_release(&priv->data, desc.type); return -EINVAL; } diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index decc725a33c2..0caa9304d2d0 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -261,10 +261,10 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return ret; } - nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv); - nft_compat_wait_for_destructors(ctx->net); + nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv); + ret = xt_check_target(&par, size, proto, inv); if (ret < 0) { if (ret == -ENOENT) { @@ -353,8 +353,6 @@ nla_put_failure: static int nft_target_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { - struct xt_target *target = expr->ops->data; - unsigned int hook_mask = 0; int ret; if (ctx->family != NFPROTO_IPV4 && @@ -377,11 +375,21 @@ static int nft_target_validate(const struct nft_ctx *ctx, const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); const struct nf_hook_ops *ops = &basechain->ops; + unsigned int hook_mask = 1 << ops->hooknum; + struct xt_target *target = expr->ops->data; + void *info = nft_expr_priv(expr); + struct xt_tgchk_param par; + union nft_entry e = {}; - hook_mask = 1 << ops->hooknum; if (target->hooks && !(hook_mask & target->hooks)) return -EINVAL; + nft_target_set_tgchk_param(&par, ctx, target, info, &e, 0, false); + + ret = xt_check_hooks_target(&par); + if (ret < 0) + return ret; + ret = nft_compat_chain_validate_dependency(ctx, target->table); if (ret < 0) return ret; @@ -515,10 +523,10 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return ret; } - nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv); - nft_compat_wait_for_destructors(ctx->net); + nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv); + return xt_check_match(&par, size, proto, inv); } @@ -614,8 +622,6 @@ static int nft_match_large_dump(struct sk_buff *skb, static int nft_match_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { - struct xt_match *match = expr->ops->data; - unsigned int hook_mask = 0; int ret; if (ctx->family != NFPROTO_IPV4 && @@ -638,11 +644,30 @@ static int nft_match_validate(const struct nft_ctx *ctx, const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); const struct nf_hook_ops *ops = &basechain->ops; + unsigned int hook_mask = 1 << ops->hooknum; + struct xt_match *match = expr->ops->data; + size_t size = XT_ALIGN(match->matchsize); + struct xt_mtchk_param par; + union nft_entry e = {}; + void *info; - hook_mask = 1 << ops->hooknum; if (match->hooks && !(hook_mask & match->hooks)) return -EINVAL; + if (NFT_EXPR_SIZE(size) > NFT_MATCH_LARGE_THRESH) { + struct nft_xt_match_priv *priv = nft_expr_priv(expr); + + info = priv->info; + } else { + info = nft_expr_priv(expr); + } + + nft_match_set_mtchk_param(&par, ctx, match, info, &e, 0, false); + + ret = xt_check_hooks_match(&par); + if (ret < 0) + return ret; + ret = nft_compat_chain_validate_dependency(ctx, match->table); if (ret < 0) return ret; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 60ee8d932fcb..fa2cc556331c 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -1334,6 +1334,8 @@ static void nft_ct_expect_obj_eval(struct nft_object *obj, if (nf_ct_expect_related(exp, 0) != 0) regs->verdict.code = NF_DROP; + + nf_ct_expect_put(exp); } static const struct nla_policy nft_ct_expect_policy[NFTA_CT_EXPECT_MAX + 1] = { diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 0407d6f708ae..e6a07c0df207 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -376,7 +376,7 @@ static void nft_exthdr_sctp_eval(const struct nft_expr *expr, const struct sctp_chunkhdr *sch; struct sctp_chunkhdr _sch; - if (pkt->tprot != IPPROTO_SCTP) + if (pkt->tprot != IPPROTO_SCTP || pkt->fragoff) goto err; do { diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index 4bce36c3a6a0..b9e88d7cf308 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -95,12 +95,15 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { + u8 *nf_dup_skb_recursion = nf_get_nf_dup_skb_recursion(); struct nft_fwd_neigh *priv = nft_expr_priv(expr); void *addr = ®s->data[priv->sreg_addr]; int oif = regs->data[priv->sreg_dev]; unsigned int verdict = NF_STOLEN; struct sk_buff *skb = pkt->skb; + int nhoff = skb_network_offset(skb); struct net_device *dev; + unsigned int hh_len; int neigh_table; switch (priv->nfproto) { @@ -111,7 +114,7 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr, verdict = NFT_BREAK; goto out; } - if (skb_try_make_writable(skb, sizeof(*iph))) { + if (skb_ensure_writable(skb, nhoff + sizeof(*iph))) { verdict = NF_DROP; goto out; } @@ -132,7 +135,7 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr, verdict = NFT_BREAK; goto out; } - if (skb_try_make_writable(skb, sizeof(*ip6h))) { + if (skb_ensure_writable(skb, nhoff + sizeof(*ip6h))) { verdict = NF_DROP; goto out; } @@ -151,13 +154,31 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr, goto out; } + if (*nf_dup_skb_recursion > NF_RECURSION_LIMIT) { + verdict = NF_DROP; + goto out; + } + dev = dev_get_by_index_rcu(nft_net(pkt), oif); - if (dev == NULL) - return; + if (dev == NULL) { + verdict = NF_DROP; + goto out; + } + + hh_len = LL_RESERVED_SPACE(dev); + if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { + skb = skb_expand_head(skb, hh_len); + if (!skb) { + verdict = NF_STOLEN; + goto out; + } + } skb->dev = dev; skb_clear_tstamp(skb); + (*nf_dup_skb_recursion)++; neigh_xmit(neigh_table, dev, addr, skb); + (*nf_dup_skb_recursion)--; out: regs->verdict.code = verdict; } diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index 18003433476c..45fe56da5044 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -28,7 +28,12 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs, struct nf_osf_data data; struct tcphdr _tcph; - if (pkt->tprot != IPPROTO_TCP) { + if (nft_pf(pkt) != NFPROTO_IPV4) { + regs->verdict.code = NFT_BREAK; + return; + } + + if (pkt->tprot != IPPROTO_TCP || pkt->fragoff) { regs->verdict.code = NFT_BREAK; return; } @@ -114,7 +119,6 @@ static int nft_osf_validate(const struct nft_ctx *ctx, switch (ctx->family) { case NFPROTO_IPV4: - case NFPROTO_IPV6: case NFPROTO_INET: hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_PRE_ROUTING) | diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c index f2101af8c867..89be443734f6 100644 --- a/net/netfilter/nft_tproxy.c +++ b/net/netfilter/nft_tproxy.c @@ -30,8 +30,8 @@ static void nft_tproxy_eval_v4(const struct nft_expr *expr, __be16 tport = 0; struct sock *sk; - if (pkt->tprot != IPPROTO_TCP && - pkt->tprot != IPPROTO_UDP) { + if ((pkt->tprot != IPPROTO_TCP && + pkt->tprot != IPPROTO_UDP) || pkt->fragoff) { regs->verdict.code = NFT_BREAK; return; } @@ -97,8 +97,8 @@ static void nft_tproxy_eval_v6(const struct nft_expr *expr, memset(&taddr, 0, sizeof(taddr)); - if (pkt->tprot != IPPROTO_TCP && - pkt->tprot != IPPROTO_UDP) { + if ((pkt->tprot != IPPROTO_TCP && + pkt->tprot != IPPROTO_UDP) || pkt->fragoff) { regs->verdict.code = NFT_BREAK; return; } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 9f837fb5ceb4..4e6708c23922 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -55,6 +55,9 @@ static struct list_head xt_templates[NFPROTO_NUMPROTO]; struct xt_pernet { struct list_head tables[NFPROTO_NUMPROTO]; + + /* stash area used during netns exit */ + struct list_head dead_tables[NFPROTO_NUMPROTO]; }; struct compat_delta { @@ -477,11 +480,9 @@ int xt_check_proc_name(const char *name, unsigned int size) } EXPORT_SYMBOL(xt_check_proc_name); -int xt_check_match(struct xt_mtchk_param *par, - unsigned int size, u16 proto, bool inv_proto) +static int xt_check_match_common(struct xt_mtchk_param *par, + unsigned int size, u16 proto, bool inv_proto) { - int ret; - if (XT_ALIGN(par->match->matchsize) != size && par->match->matchsize != -1) { /* @@ -530,6 +531,14 @@ int xt_check_match(struct xt_mtchk_param *par, par->match->proto); return -EINVAL; } + + return 0; +} + +static int xt_checkentry_match(struct xt_mtchk_param *par) +{ + int ret; + if (par->match->checkentry != NULL) { ret = par->match->checkentry(par); if (ret < 0) @@ -538,8 +547,34 @@ int xt_check_match(struct xt_mtchk_param *par, /* Flag up potential errors. */ return -EIO; } + + return 0; +} + +int xt_check_hooks_match(struct xt_mtchk_param *par) +{ + if (par->match->check_hooks != NULL) + return par->match->check_hooks(par); + return 0; } +EXPORT_SYMBOL_GPL(xt_check_hooks_match); + +int xt_check_match(struct xt_mtchk_param *par, + unsigned int size, u16 proto, bool inv_proto) +{ + int ret; + + ret = xt_check_match_common(par, size, proto, inv_proto); + if (ret < 0) + return ret; + + ret = xt_check_hooks_match(par); + if (ret < 0) + return ret; + + return xt_checkentry_match(par); +} EXPORT_SYMBOL_GPL(xt_check_match); /** xt_check_entry_match - check that matches end before start of target @@ -1012,11 +1047,9 @@ bool xt_find_jump_offset(const unsigned int *offsets, } EXPORT_SYMBOL(xt_find_jump_offset); -int xt_check_target(struct xt_tgchk_param *par, - unsigned int size, u16 proto, bool inv_proto) +static int xt_check_target_common(struct xt_tgchk_param *par, + unsigned int size, u16 proto, bool inv_proto) { - int ret; - if (XT_ALIGN(par->target->targetsize) != size) { pr_err_ratelimited("%s_tables: %s.%u target: invalid size %u (kernel) != (user) %u\n", xt_prefix[par->family], par->target->name, @@ -1061,6 +1094,23 @@ int xt_check_target(struct xt_tgchk_param *par, par->target->proto); return -EINVAL; } + + return 0; +} + +int xt_check_hooks_target(struct xt_tgchk_param *par) +{ + if (par->target->check_hooks != NULL) + return par->target->check_hooks(par); + + return 0; +} +EXPORT_SYMBOL_GPL(xt_check_hooks_target); + +static int xt_checkentry_target(struct xt_tgchk_param *par) +{ + int ret; + if (par->target->checkentry != NULL) { ret = par->target->checkentry(par); if (ret < 0) @@ -1071,6 +1121,22 @@ int xt_check_target(struct xt_tgchk_param *par, } return 0; } + +int xt_check_target(struct xt_tgchk_param *par, + unsigned int size, u16 proto, bool inv_proto) +{ + int ret; + + ret = xt_check_target_common(par, size, proto, inv_proto); + if (ret < 0) + return ret; + + ret = xt_check_hooks_target(par); + if (ret < 0) + return ret; + + return xt_checkentry_target(par); +} EXPORT_SYMBOL_GPL(xt_check_target); /** @@ -1409,11 +1475,9 @@ struct xt_counters *xt_counters_alloc(unsigned int counters) } EXPORT_SYMBOL(xt_counters_alloc); -struct xt_table_info * -xt_replace_table(struct xt_table *table, - unsigned int num_counters, - struct xt_table_info *newinfo, - int *error) +static struct xt_table_info * +do_replace_table(struct xt_table *table, unsigned int num_counters, + struct xt_table_info *newinfo, int *error) { struct xt_table_info *private; unsigned int cpu; @@ -1468,30 +1532,54 @@ xt_replace_table(struct xt_table *table, } } - audit_log_nfcfg(table->name, table->af, private->number, - !private->number ? AUDIT_XT_OP_REGISTER : - AUDIT_XT_OP_REPLACE, - GFP_KERNEL); + return private; +} + +struct xt_table_info * +xt_replace_table(struct xt_table *table, unsigned int num_counters, + struct xt_table_info *newinfo, + int *error) +{ + struct xt_table_info *private; + + private = do_replace_table(table, num_counters, newinfo, error); + if (private) + audit_log_nfcfg(table->name, table->af, private->number, + AUDIT_XT_OP_REPLACE, + GFP_KERNEL); + return private; } EXPORT_SYMBOL_GPL(xt_replace_table); struct xt_table *xt_register_table(struct net *net, const struct xt_table *input_table, + const struct nf_hook_ops *template_ops, struct xt_table_info *bootstrap, struct xt_table_info *newinfo) { struct xt_pernet *xt_net = net_generic(net, xt_pernet_id); + struct xt_table *t, *table = NULL; + struct nf_hook_ops *ops = NULL; struct xt_table_info *private; - struct xt_table *t, *table; - int ret; + unsigned int num_ops; + int ret = -EINVAL; + + num_ops = hweight32(input_table->valid_hooks); + if (num_ops == 0) + goto out; + + ret = -ENOMEM; + if (template_ops) { + ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL); + if (!ops) + goto out; + } /* Don't add one object to multiple lists. */ table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL); - if (!table) { - ret = -ENOMEM; + if (!table) goto out; - } mutex_lock(&xt[table->af].mutex); /* Don't autoload: we'd eat our tail... */ @@ -1505,7 +1593,7 @@ struct xt_table *xt_register_table(struct net *net, /* Simplifies replace_table code. */ table->private = bootstrap; - if (!xt_replace_table(table, 0, newinfo, &ret)) + if (!do_replace_table(table, 0, newinfo, &ret)) goto unlock; private = table->private; @@ -1514,34 +1602,122 @@ struct xt_table *xt_register_table(struct net *net, /* save number of initial entries */ private->initial_entries = private->number; + if (ops) { + int i; + + for (i = 0; i < num_ops; i++) + ops[i].priv = table; + + ret = nf_register_net_hooks(net, ops, num_ops); + if (ret != 0) { + mutex_unlock(&xt[table->af].mutex); + /* nf_register_net_hooks() might have published a + * base chain before internal error unwind. + */ + synchronize_rcu(); + goto out; + } + + table->ops = ops; + } + + audit_log_nfcfg(table->name, table->af, private->number, + AUDIT_XT_OP_REGISTER, GFP_KERNEL); + list_add(&table->list, &xt_net->tables[table->af]); mutex_unlock(&xt[table->af].mutex); return table; unlock: mutex_unlock(&xt[table->af].mutex); - kfree(table); out: + kfree(table); + kfree(ops); return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(xt_register_table); -void *xt_unregister_table(struct xt_table *table) +/** + * xt_unregister_table_pre_exit - pre-shutdown unregister of a table + * @net: network namespace + * @af: address family (e.g., NFPROTO_IPV4, NFPROTO_IPV6) + * @name: name of the table to unregister + * + * Unregisters the specified netfilter table from the given network namespace + * and also unregisters the hooks from netfilter core: no new packets will be + * processed. + * + * This must be called prior to xt_unregister_table_exit() from the pernet + * .pre_exit callback. After this call, the table is no longer visible to + * the get/setsockopt path. In case of rmmod, module exit path must have + * called xt_unregister_template() prior to unregistering pernet ops to + * prevent re-instantiation of the table. + * + * See also: xt_unregister_table_exit() + */ +void xt_unregister_table_pre_exit(struct net *net, u8 af, const char *name) { - struct xt_table_info *private; + struct xt_pernet *xt_net = net_generic(net, xt_pernet_id); + struct xt_table *t; - mutex_lock(&xt[table->af].mutex); - private = table->private; - list_del(&table->list); - mutex_unlock(&xt[table->af].mutex); - audit_log_nfcfg(table->name, table->af, private->number, - AUDIT_XT_OP_UNREGISTER, GFP_KERNEL); - kfree(table->ops); - kfree(table); + mutex_lock(&xt[af].mutex); + list_for_each_entry(t, &xt_net->tables[af], list) { + if (strcmp(t->name, name) == 0) { + list_move(&t->list, &xt_net->dead_tables[af]); + mutex_unlock(&xt[af].mutex); - return private; + if (t->ops) /* nat table registers with nat core, t->ops is NULL. */ + nf_unregister_net_hooks(net, t->ops, hweight32(t->valid_hooks)); + return; + } + } + mutex_unlock(&xt[af].mutex); +} +EXPORT_SYMBOL(xt_unregister_table_pre_exit); + +/** + * xt_unregister_table_exit - remove a table during namespace teardown + * @net: the network namespace from which to unregister the table + * @af: address family (e.g., NFPROTO_IPV4, NFPROTO_IPV6) + * @name: name of the table to unregister + * + * Completes the unregister process for a table. This must be called from + * the pernet ops .exit callback. This is the second stage after + * xt_unregister_table_pre_exit(). + * + * pair with xt_unregister_table_pre_exit() during namespace shutdown. + * + * Return: the unregistered table or NULL if the table was never + * instantiated. The caller needs to kfree() the table after it + * has removed the family specific matches/targets. + */ +struct xt_table *xt_unregister_table_exit(struct net *net, u8 af, const char *name) +{ + struct xt_pernet *xt_net = net_generic(net, xt_pernet_id); + struct xt_table *table; + + mutex_lock(&xt[af].mutex); + list_for_each_entry(table, &xt_net->dead_tables[af], list) { + struct nf_hook_ops *ops = NULL; + + if (strcmp(table->name, name) != 0) + continue; + + list_del(&table->list); + + audit_log_nfcfg(table->name, table->af, table->private->number, + AUDIT_XT_OP_UNREGISTER, GFP_KERNEL); + swap(table->ops, ops); + mutex_unlock(&xt[af].mutex); + + kfree(ops); + return table; + } + mutex_unlock(&xt[af].mutex); + + return NULL; } -EXPORT_SYMBOL_GPL(xt_unregister_table); +EXPORT_SYMBOL_GPL(xt_unregister_table_exit); #endif #ifdef CONFIG_PROC_FS @@ -1988,8 +2164,10 @@ static int __net_init xt_net_init(struct net *net) struct xt_pernet *xt_net = net_generic(net, xt_pernet_id); int i; - for (i = 0; i < NFPROTO_NUMPROTO; i++) + for (i = 0; i < NFPROTO_NUMPROTO; i++) { INIT_LIST_HEAD(&xt_net->tables[i]); + INIT_LIST_HEAD(&xt_net->dead_tables[i]); + } return 0; } @@ -1998,8 +2176,10 @@ static void __net_exit xt_net_exit(struct net *net) struct xt_pernet *xt_net = net_generic(net, xt_pernet_id); int i; - for (i = 0; i < NFPROTO_NUMPROTO; i++) + for (i = 0; i < NFPROTO_NUMPROTO; i++) { WARN_ON_ONCE(!list_empty(&xt_net->tables[i])); + WARN_ON_ONCE(!list_empty(&xt_net->dead_tables[i])); + } } static struct pernet_operations xt_net_ops = { diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 498f5871c84a..d2aeacf94230 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -354,7 +354,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = { .family = NFPROTO_IPV4, .revision = 1, .targetsize = sizeof(struct xt_ct_target_info_v1), - .usersize = offsetof(struct xt_ct_target_info, ct), + .usersize = offsetof(struct xt_ct_target_info_v1, ct), .checkentry = xt_ct_tg_check_v1, .destroy = xt_ct_tg_destroy_v1, .target = xt_ct_target_v1, @@ -366,7 +366,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = { .family = NFPROTO_IPV4, .revision = 2, .targetsize = sizeof(struct xt_ct_target_info_v1), - .usersize = offsetof(struct xt_ct_target_info, ct), + .usersize = offsetof(struct xt_ct_target_info_v1, ct), .checkentry = xt_ct_tg_check_v2, .destroy = xt_ct_tg_destroy_v1, .target = xt_ct_target_v1, @@ -398,7 +398,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = { .family = NFPROTO_IPV6, .revision = 1, .targetsize = sizeof(struct xt_ct_target_info_v1), - .usersize = offsetof(struct xt_ct_target_info, ct), + .usersize = offsetof(struct xt_ct_target_info_v1, ct), .checkentry = xt_ct_tg_check_v1, .destroy = xt_ct_tg_destroy_v1, .target = xt_ct_target_v1, @@ -410,7 +410,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = { .family = NFPROTO_IPV6, .revision = 2, .targetsize = sizeof(struct xt_ct_target_info_v1), - .usersize = offsetof(struct xt_ct_target_info, ct), + .usersize = offsetof(struct xt_ct_target_info_v1, ct), .checkentry = xt_ct_tg_check_v2, .destroy = xt_ct_tg_destroy_v1, .target = xt_ct_target_v1, diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 116a885adb3c..80e1634bc51f 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -247,6 +247,21 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par) } #endif +static int tcpmss_tg4_check_hooks(const struct xt_tgchk_param *par) +{ + const struct xt_tcpmss_info *info = par->targinfo; + + if (info->mss == XT_TCPMSS_CLAMP_PMTU && + (par->hook_mask & ~((1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING))) != 0) { + pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n"); + return -EINVAL; + } + + return 0; +} + /* Must specify -p tcp --syn */ static inline bool find_syn_match(const struct xt_entry_match *m) { @@ -262,17 +277,9 @@ static inline bool find_syn_match(const struct xt_entry_match *m) static int tcpmss_tg4_check(const struct xt_tgchk_param *par) { - const struct xt_tcpmss_info *info = par->targinfo; const struct ipt_entry *e = par->entryinfo; const struct xt_entry_match *ematch; - if (info->mss == XT_TCPMSS_CLAMP_PMTU && - (par->hook_mask & ~((1 << NF_INET_FORWARD) | - (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_POST_ROUTING))) != 0) { - pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n"); - return -EINVAL; - } if (par->nft_compat) return 0; @@ -286,17 +293,9 @@ static int tcpmss_tg4_check(const struct xt_tgchk_param *par) #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static int tcpmss_tg6_check(const struct xt_tgchk_param *par) { - const struct xt_tcpmss_info *info = par->targinfo; const struct ip6t_entry *e = par->entryinfo; const struct xt_entry_match *ematch; - if (info->mss == XT_TCPMSS_CLAMP_PMTU && - (par->hook_mask & ~((1 << NF_INET_FORWARD) | - (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_POST_ROUTING))) != 0) { - pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n"); - return -EINVAL; - } if (par->nft_compat) return 0; @@ -312,6 +311,7 @@ static struct xt_target tcpmss_tg_reg[] __read_mostly = { { .family = NFPROTO_IPV4, .name = "TCPMSS", + .check_hooks = tcpmss_tg4_check_hooks, .checkentry = tcpmss_tg4_check, .target = tcpmss_tg4, .targetsize = sizeof(struct xt_tcpmss_info), @@ -322,6 +322,7 @@ static struct xt_target tcpmss_tg_reg[] __read_mostly = { { .family = NFPROTO_IPV6, .name = "TCPMSS", + .check_hooks = tcpmss_tg4_check_hooks, .checkentry = tcpmss_tg6_check, .target = tcpmss_tg6, .targetsize = sizeof(struct xt_tcpmss_info), diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index e4bea1d346cf..5f60e7298a1e 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -86,6 +86,9 @@ tproxy_tg4_v0(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tproxy_target_info *tgi = par->targinfo; + if (par->fragoff) + return NF_DROP; + return tproxy_tg4(xt_net(par), skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value); } @@ -95,6 +98,9 @@ tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tproxy_target_info_v1 *tgi = par->targinfo; + if (par->fragoff) + return NF_DROP; + return tproxy_tg4(xt_net(par), skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value); } @@ -106,6 +112,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct ipv6hdr *iph = ipv6_hdr(skb); const struct xt_tproxy_target_info_v1 *tgi = par->targinfo; + unsigned short fragoff = 0; struct udphdr _hdr, *hp; struct sock *sk; const struct in6_addr *laddr; @@ -113,8 +120,8 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) int thoff = 0; int tproto; - tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); - if (tproto < 0) + tproto = ipv6_find_hdr(skb, &thoff, -1, &fragoff, NULL); + if (tproto < 0 || fragoff) return NF_DROP; hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr); diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index a77088943107..913dbe3aa5e2 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -153,14 +153,10 @@ addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) return ret; } -static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) +static int addrtype_mt_check_hooks(const struct xt_mtchk_param *par) { - const char *errmsg = "both incoming and outgoing interface limitation cannot be selected"; struct xt_addrtype_info_v1 *info = par->matchinfo; - - if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN && - info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) - goto err; + const char *errmsg; if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN)) && @@ -176,6 +172,21 @@ static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) goto err; } + return 0; +err: + pr_info_ratelimited("%s\n", errmsg); + return -EINVAL; +} + +static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) +{ + const char *errmsg = "both incoming and outgoing interface limitation cannot be selected"; + struct xt_addrtype_info_v1 *info = par->matchinfo; + + if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN && + info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) + goto err; + #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) if (par->family == NFPROTO_IPV6) { if ((info->source | info->dest) & XT_ADDRTYPE_BLACKHOLE) { @@ -211,6 +222,7 @@ static struct xt_match addrtype_mt_reg[] __read_mostly = { .family = NFPROTO_IPV4, .revision = 1, .match = addrtype_mt_v1, + .check_hooks = addrtype_mt_check_hooks, .checkentry = addrtype_mt_checkentry_v1, .matchsize = sizeof(struct xt_addrtype_info_v1), .me = THIS_MODULE @@ -221,6 +233,7 @@ static struct xt_match addrtype_mt_reg[] __read_mostly = { .family = NFPROTO_IPV6, .revision = 1, .match = addrtype_mt_v1, + .check_hooks = addrtype_mt_check_hooks, .checkentry = addrtype_mt_checkentry_v1, .matchsize = sizeof(struct xt_addrtype_info_v1), .me = THIS_MODULE diff --git a/net/netfilter/xt_devgroup.c b/net/netfilter/xt_devgroup.c index 9520dd00070b..6d1a44ab5eee 100644 --- a/net/netfilter/xt_devgroup.c +++ b/net/netfilter/xt_devgroup.c @@ -33,14 +33,10 @@ static bool devgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) return true; } -static int devgroup_mt_checkentry(const struct xt_mtchk_param *par) +static int devgroup_mt_check_hooks(const struct xt_mtchk_param *par) { const struct xt_devgroup_info *info = par->matchinfo; - if (info->flags & ~(XT_DEVGROUP_MATCH_SRC | XT_DEVGROUP_INVERT_SRC | - XT_DEVGROUP_MATCH_DST | XT_DEVGROUP_INVERT_DST)) - return -EINVAL; - if (info->flags & XT_DEVGROUP_MATCH_SRC && par->hook_mask & ~((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | @@ -56,9 +52,21 @@ static int devgroup_mt_checkentry(const struct xt_mtchk_param *par) return 0; } +static int devgroup_mt_checkentry(const struct xt_mtchk_param *par) +{ + const struct xt_devgroup_info *info = par->matchinfo; + + if (info->flags & ~(XT_DEVGROUP_MATCH_SRC | XT_DEVGROUP_INVERT_SRC | + XT_DEVGROUP_MATCH_DST | XT_DEVGROUP_INVERT_DST)) + return -EINVAL; + + return 0; +} + static struct xt_match devgroup_mt_reg __read_mostly = { .name = "devgroup", .match = devgroup_mt, + .check_hooks = devgroup_mt_check_hooks, .checkentry = devgroup_mt_checkentry, .matchsize = sizeof(struct xt_devgroup_info), .family = NFPROTO_UNSPEC, diff --git a/net/netfilter/xt_ecn.c b/net/netfilter/xt_ecn.c index b96e8203ac54..a8503f5d26bf 100644 --- a/net/netfilter/xt_ecn.c +++ b/net/netfilter/xt_ecn.c @@ -30,6 +30,10 @@ static bool match_tcp(const struct sk_buff *skb, struct xt_action_param *par) struct tcphdr _tcph; const struct tcphdr *th; + /* this is fine for IPv6 as ecn_mt_check6() enforces -p tcp */ + if (par->fragoff) + return false; + /* In practice, TCP match does this, so can't fail. But let's * be good citizens. */ diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 3bd127bfc114..2704b4b60d1e 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -658,6 +658,8 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, if (!(hinfo->cfg.mode & (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT))) return 0; + if (ntohs(ip_hdr(skb)->frag_off) & IP_OFFSET) + return -1; nexthdr = ip_hdr(skb)->protocol; break; #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) @@ -681,7 +683,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, return 0; nexthdr = ipv6_hdr(skb)->nexthdr; protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off); - if ((int)protoff < 0) + if ((int)protoff < 0 || ntohs(frag_off) & IP6_OFFSET) return -1; break; } diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c index 4798cd2ca26e..7fc5156825e4 100644 --- a/net/netfilter/xt_mac.c +++ b/net/netfilter/xt_mac.c @@ -36,25 +36,37 @@ static bool mac_mt(const struct sk_buff *skb, struct xt_action_param *par) return ret; } -static struct xt_match mac_mt_reg __read_mostly = { - .name = "mac", - .revision = 0, - .family = NFPROTO_UNSPEC, - .match = mac_mt, - .matchsize = sizeof(struct xt_mac_info), - .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | - (1 << NF_INET_FORWARD), - .me = THIS_MODULE, +static struct xt_match mac_mt_reg[] __read_mostly = { + { + .name = "mac", + .family = NFPROTO_IPV4, + .match = mac_mt, + .matchsize = sizeof(struct xt_mac_info), + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD), + .me = THIS_MODULE, + }, + { + .name = "mac", + .family = NFPROTO_IPV6, + .match = mac_mt, + .matchsize = sizeof(struct xt_mac_info), + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD), + .me = THIS_MODULE, + }, }; static int __init mac_mt_init(void) { - return xt_register_match(&mac_mt_reg); + return xt_register_matches(mac_mt_reg, ARRAY_SIZE(mac_mt_reg)); } static void __exit mac_mt_exit(void) { - xt_unregister_match(&mac_mt_reg); + xt_unregister_matches(mac_mt_reg, ARRAY_SIZE(mac_mt_reg)); } module_init(mac_mt_init); diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index dc9485854002..e8807caede68 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -27,6 +27,9 @@ static bool xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) { + if (p->fragoff) + return false; + return nf_osf_match(skb, xt_family(p), xt_hooknum(p), xt_in(p), xt_out(p), p->matchinfo, xt_net(p), nf_osf_fingers); } diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index 5bfb4843df66..8f2e57b2a586 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -127,26 +127,39 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) return true; } -static struct xt_match owner_mt_reg __read_mostly = { - .name = "owner", - .revision = 1, - .family = NFPROTO_UNSPEC, - .checkentry = owner_check, - .match = owner_mt, - .matchsize = sizeof(struct xt_owner_match_info), - .hooks = (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_POST_ROUTING), - .me = THIS_MODULE, +static struct xt_match owner_mt_reg[] __read_mostly = { + { + .name = "owner", + .revision = 1, + .family = NFPROTO_IPV4, + .checkentry = owner_check, + .match = owner_mt, + .matchsize = sizeof(struct xt_owner_match_info), + .hooks = (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING), + .me = THIS_MODULE, + }, + { + .name = "owner", + .revision = 1, + .family = NFPROTO_IPV6, + .checkentry = owner_check, + .match = owner_mt, + .matchsize = sizeof(struct xt_owner_match_info), + .hooks = (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING), + .me = THIS_MODULE, + } }; static int __init owner_mt_init(void) { - return xt_register_match(&owner_mt_reg); + return xt_register_matches(owner_mt_reg, ARRAY_SIZE(owner_mt_reg)); } static void __exit owner_mt_exit(void) { - xt_unregister_match(&owner_mt_reg); + xt_unregister_matches(owner_mt_reg, ARRAY_SIZE(owner_mt_reg)); } module_init(owner_mt_init); diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index 53997771013f..dd98f758176c 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -91,14 +91,10 @@ match_outdev: return (!!ret ^ !(info->invert & XT_PHYSDEV_OP_OUT)); } -static int physdev_mt_check(const struct xt_mtchk_param *par) +static int physdev_mt_check_hooks(const struct xt_mtchk_param *par) { const struct xt_physdev_info *info = par->matchinfo; - static bool brnf_probed __read_mostly; - if (!(info->bitmask & XT_PHYSDEV_OP_MASK) || - info->bitmask & ~XT_PHYSDEV_OP_MASK) - return -EINVAL; if (info->bitmask & (XT_PHYSDEV_OP_OUT | XT_PHYSDEV_OP_ISOUT) && (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) || info->invert & XT_PHYSDEV_OP_BRIDGED) && @@ -107,6 +103,18 @@ static int physdev_mt_check(const struct xt_mtchk_param *par) return -EINVAL; } + return 0; +} + +static int physdev_mt_check(const struct xt_mtchk_param *par) +{ + const struct xt_physdev_info *info = par->matchinfo; + static bool brnf_probed __read_mostly; + + if (!(info->bitmask & XT_PHYSDEV_OP_MASK) || + info->bitmask & ~XT_PHYSDEV_OP_MASK) + return -EINVAL; + #define X(memb) strnlen(info->memb, sizeof(info->memb)) >= sizeof(info->memb) if (info->bitmask & XT_PHYSDEV_OP_IN) { if (info->physindev[0] == '\0') @@ -137,24 +145,35 @@ static int physdev_mt_check(const struct xt_mtchk_param *par) return 0; } -static struct xt_match physdev_mt_reg __read_mostly = { - .name = "physdev", - .revision = 0, - .family = NFPROTO_UNSPEC, - .checkentry = physdev_mt_check, - .match = physdev_mt, - .matchsize = sizeof(struct xt_physdev_info), - .me = THIS_MODULE, +static struct xt_match physdev_mt_reg[] __read_mostly = { + { + .name = "physdev", + .family = NFPROTO_IPV4, + .check_hooks = physdev_mt_check_hooks, + .checkentry = physdev_mt_check, + .match = physdev_mt, + .matchsize = sizeof(struct xt_physdev_info), + .me = THIS_MODULE, + }, + { + .name = "physdev", + .family = NFPROTO_IPV6, + .check_hooks = physdev_mt_check_hooks, + .checkentry = physdev_mt_check, + .match = physdev_mt, + .matchsize = sizeof(struct xt_physdev_info), + .me = THIS_MODULE, + }, }; static int __init physdev_mt_init(void) { - return xt_register_match(&physdev_mt_reg); + return xt_register_matches(physdev_mt_reg, ARRAY_SIZE(physdev_mt_reg)); } static void __exit physdev_mt_exit(void) { - xt_unregister_match(&physdev_mt_reg); + xt_unregister_matches(physdev_mt_reg, ARRAY_SIZE(physdev_mt_reg)); } module_init(physdev_mt_init); diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index cb6e8279010a..ff54e3a8581e 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -63,7 +63,7 @@ match_policy_in(const struct sk_buff *skb, const struct xt_policy_info *info, return 0; for (i = sp->len - 1; i >= 0; i--) { - pos = strict ? i - sp->len + 1 : 0; + pos = strict ? sp->len - i - 1 : 0; if (pos >= info->len) return 0; e = &info->pol[pos]; @@ -126,13 +126,10 @@ policy_mt(const struct sk_buff *skb, struct xt_action_param *par) return ret; } -static int policy_mt_check(const struct xt_mtchk_param *par) +static int policy_mt_check_hooks(const struct xt_mtchk_param *par) { const struct xt_policy_info *info = par->matchinfo; - const char *errmsg = "neither incoming nor outgoing policy selected"; - - if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT))) - goto err; + const char *errmsg; if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN)) && info->flags & XT_POLICY_MATCH_OUT) { @@ -144,6 +141,21 @@ static int policy_mt_check(const struct xt_mtchk_param *par) errmsg = "input policy not valid in POSTROUTING and OUTPUT"; goto err; } + + return 0; +err: + pr_info_ratelimited("%s\n", errmsg); + return -EINVAL; +} + +static int policy_mt_check(const struct xt_mtchk_param *par) +{ + const struct xt_policy_info *info = par->matchinfo; + const char *errmsg = "neither incoming nor outgoing policy selected"; + + if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT))) + goto err; + if (info->len > XT_POLICY_MAX_ELEM) { errmsg = "too many policy elements"; goto err; @@ -158,6 +170,7 @@ static struct xt_match policy_mt_reg[] __read_mostly = { { .name = "policy", .family = NFPROTO_IPV4, + .check_hooks = policy_mt_check_hooks, .checkentry = policy_mt_check, .match = policy_mt, .matchsize = sizeof(struct xt_policy_info), @@ -166,6 +179,7 @@ static struct xt_match policy_mt_reg[] __read_mostly = { { .name = "policy", .family = NFPROTO_IPV6, + .check_hooks = policy_mt_check_hooks, .checkentry = policy_mt_check, .match = policy_mt, .matchsize = sizeof(struct xt_policy_info), diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c index 6df485f4403d..61b2f1e58d15 100644 --- a/net/netfilter/xt_realm.c +++ b/net/netfilter/xt_realm.c @@ -33,7 +33,7 @@ static struct xt_match realm_mt_reg __read_mostly = { .matchsize = sizeof(struct xt_realm_info), .hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_FORWARD) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_LOCAL_IN), - .family = NFPROTO_UNSPEC, + .family = NFPROTO_IPV4, .me = THIS_MODULE }; diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 731bc2cafae4..4ae04bba9358 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -431,6 +431,29 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par) } static int +set_target_v3_check_hooks(const struct xt_tgchk_param *par) +{ + const struct xt_set_info_target_v3 *info = par->targinfo; + + if (info->map_set.index != IPSET_INVALID_ID) { + if (strncmp(par->table, "mangle", 7)) { + pr_info_ratelimited("--map-set only usable from mangle table\n"); + return -EINVAL; + } + if (((info->flags & IPSET_FLAG_MAP_SKBPRIO) | + (info->flags & IPSET_FLAG_MAP_SKBQUEUE)) && + (par->hook_mask & ~(1 << NF_INET_FORWARD | + 1 << NF_INET_LOCAL_OUT | + 1 << NF_INET_POST_ROUTING))) { + pr_info_ratelimited("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n"); + return -EINVAL; + } + } + + return 0; +} + +static int set_target_v3_checkentry(const struct xt_tgchk_param *par) { const struct xt_set_info_target_v3 *info = par->targinfo; @@ -459,20 +482,6 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par) } if (info->map_set.index != IPSET_INVALID_ID) { - if (strncmp(par->table, "mangle", 7)) { - pr_info_ratelimited("--map-set only usable from mangle table\n"); - ret = -EINVAL; - goto cleanup_del; - } - if (((info->flags & IPSET_FLAG_MAP_SKBPRIO) | - (info->flags & IPSET_FLAG_MAP_SKBQUEUE)) && - (par->hook_mask & ~(1 << NF_INET_FORWARD | - 1 << NF_INET_LOCAL_OUT | - 1 << NF_INET_POST_ROUTING))) { - pr_info_ratelimited("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n"); - ret = -EINVAL; - goto cleanup_del; - } index = ip_set_nfnl_get_byindex(par->net, info->map_set.index); if (index == IPSET_INVALID_ID) { @@ -672,6 +681,7 @@ static struct xt_target set_targets[] __read_mostly = { .family = NFPROTO_IPV4, .target = set_target_v3, .targetsize = sizeof(struct xt_set_info_target_v3), + .check_hooks = set_target_v3_check_hooks, .checkentry = set_target_v3_checkentry, .destroy = set_target_v3_destroy, .me = THIS_MODULE @@ -682,6 +692,7 @@ static struct xt_target set_targets[] __read_mostly = { .family = NFPROTO_IPV6, .target = set_target_v3, .targetsize = sizeof(struct xt_set_info_target_v3), + .check_hooks = set_target_v3_check_hooks, .checkentry = set_target_v3_checkentry, .destroy = set_target_v3_destroy, .me = THIS_MODULE diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c index 0d32d4841cb3..b9da8269161d 100644 --- a/net/netfilter/xt_tcpmss.c +++ b/net/netfilter/xt_tcpmss.c @@ -32,6 +32,10 @@ tcpmss_mt(const struct sk_buff *skb, struct xt_action_param *par) u8 _opt[15 * 4 - sizeof(_tcph)]; unsigned int i, optlen; + /* this is fine for IPv6 as xt_tcpmss enforces -p tcp */ + if (par->fragoff) + return false; + /* If we don't have the whole header, drop packet. */ th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph); if (th == NULL) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index d251d894afd4..0da39eaed255 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1972,8 +1972,10 @@ int genlmsg_multicast_allns(const struct genl_family *family, struct sk_buff *skb, u32 portid, unsigned int group) { - if (WARN_ON_ONCE(group >= family->n_mcgrps)) + if (WARN_ON_ONCE(group >= family->n_mcgrps)) { + kfree_skb(skb); return -EINVAL; + } group = family->mcgrp_offset + group; return genlmsg_mcast(skb, portid, group); @@ -1986,8 +1988,10 @@ void genl_notify(const struct genl_family *family, struct sk_buff *skb, struct net *net = genl_info_net(info); struct sock *sk = net->genl_sock; - if (WARN_ON_ONCE(group >= family->n_mcgrps)) + if (WARN_ON_ONCE(group >= family->n_mcgrps)) { + kfree_skb(skb); return; + } group = family->mcgrp_offset + group; nlmsg_notify(sk, skb, info->snd_portid, group, diff --git a/net/netrom/Makefile b/net/netrom/Makefile deleted file mode 100644 index 603e36c9af2e..000000000000 --- a/net/netrom/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# Makefile for the Linux NET/ROM layer. -# - -obj-$(CONFIG_NETROM) += netrom.o - -netrom-y := af_netrom.o nr_dev.o nr_in.o nr_loopback.o \ - nr_out.o nr_route.o nr_subr.o nr_timer.o -netrom-$(CONFIG_SYSCTL) += sysctl_net_netrom.o diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c deleted file mode 100644 index 5fc54836dfa8..000000000000 --- a/net/netrom/af_netrom.c +++ /dev/null @@ -1,1536 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - * Copyright Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk) - * Copyright Darryl Miles G7LED (dlm@g7led.demon.co.uk) - */ -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/capability.h> -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/slab.h> -#include <linux/kernel.h> -#include <linux/sched/signal.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/stat.h> -#include <net/ax25.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/if_arp.h> -#include <linux/skbuff.h> -#include <net/net_namespace.h> -#include <net/sock.h> -#include <linux/uaccess.h> -#include <linux/fcntl.h> -#include <linux/termios.h> /* For TIOCINQ/OUTQ */ -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <linux/notifier.h> -#include <net/netrom.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <net/ip.h> -#include <net/tcp_states.h> -#include <net/arp.h> -#include <linux/init.h> - -static int nr_ndevs = 4; - -int sysctl_netrom_default_path_quality = NR_DEFAULT_QUAL; -int sysctl_netrom_obsolescence_count_initialiser = NR_DEFAULT_OBS; -int sysctl_netrom_network_ttl_initialiser = NR_DEFAULT_TTL; -int sysctl_netrom_transport_timeout = NR_DEFAULT_T1; -int sysctl_netrom_transport_maximum_tries = NR_DEFAULT_N2; -int sysctl_netrom_transport_acknowledge_delay = NR_DEFAULT_T2; -int sysctl_netrom_transport_busy_delay = NR_DEFAULT_T4; -int sysctl_netrom_transport_requested_window_size = NR_DEFAULT_WINDOW; -int sysctl_netrom_transport_no_activity_timeout = NR_DEFAULT_IDLE; -int sysctl_netrom_routing_control = NR_DEFAULT_ROUTING; -int sysctl_netrom_link_fails_count = NR_DEFAULT_FAILS; -int sysctl_netrom_reset_circuit = NR_DEFAULT_RESET; - -static unsigned short circuit = 0x101; - -static HLIST_HEAD(nr_list); -static DEFINE_SPINLOCK(nr_list_lock); - -static const struct proto_ops nr_proto_ops; - -/* - * NETROM network devices are virtual network devices encapsulating NETROM - * frames into AX.25 which will be sent through an AX.25 device, so form a - * special "super class" of normal net devices; split their locks off into a - * separate class since they always nest. - */ -static struct lock_class_key nr_netdev_xmit_lock_key; -static struct lock_class_key nr_netdev_addr_lock_key; - -static void nr_set_lockdep_one(struct net_device *dev, - struct netdev_queue *txq, - void *_unused) -{ - lockdep_set_class(&txq->_xmit_lock, &nr_netdev_xmit_lock_key); -} - -static void nr_set_lockdep_key(struct net_device *dev) -{ - lockdep_set_class(&dev->addr_list_lock, &nr_netdev_addr_lock_key); - netdev_for_each_tx_queue(dev, nr_set_lockdep_one, NULL); -} - -/* - * Socket removal during an interrupt is now safe. - */ -static void nr_remove_socket(struct sock *sk) -{ - spin_lock_bh(&nr_list_lock); - sk_del_node_init(sk); - spin_unlock_bh(&nr_list_lock); -} - -/* - * Kill all bound sockets on a dropped device. - */ -static void nr_kill_by_device(struct net_device *dev) -{ - struct sock *s; - - spin_lock_bh(&nr_list_lock); - sk_for_each(s, &nr_list) - if (nr_sk(s)->device == dev) - nr_disconnect(s, ENETUNREACH); - spin_unlock_bh(&nr_list_lock); -} - -/* - * Handle device status changes. - */ -static int nr_device_event(struct notifier_block *this, unsigned long event, void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; - - if (event != NETDEV_DOWN) - return NOTIFY_DONE; - - nr_kill_by_device(dev); - nr_rt_device_down(dev); - - return NOTIFY_DONE; -} - -/* - * Add a socket to the bound sockets list. - */ -static void nr_insert_socket(struct sock *sk) -{ - spin_lock_bh(&nr_list_lock); - sk_add_node(sk, &nr_list); - spin_unlock_bh(&nr_list_lock); -} - -/* - * Find a socket that wants to accept the Connect Request we just - * received. - */ -static struct sock *nr_find_listener(ax25_address *addr) -{ - struct sock *s; - - spin_lock_bh(&nr_list_lock); - sk_for_each(s, &nr_list) - if (!ax25cmp(&nr_sk(s)->source_addr, addr) && - s->sk_state == TCP_LISTEN) { - sock_hold(s); - goto found; - } - s = NULL; -found: - spin_unlock_bh(&nr_list_lock); - return s; -} - -/* - * Find a connected NET/ROM socket given my circuit IDs. - */ -static struct sock *nr_find_socket(unsigned char index, unsigned char id) -{ - struct sock *s; - - spin_lock_bh(&nr_list_lock); - sk_for_each(s, &nr_list) { - struct nr_sock *nr = nr_sk(s); - - if (nr->my_index == index && nr->my_id == id) { - sock_hold(s); - goto found; - } - } - s = NULL; -found: - spin_unlock_bh(&nr_list_lock); - return s; -} - -/* - * Find a connected NET/ROM socket given their circuit IDs. - */ -static struct sock *nr_find_peer(unsigned char index, unsigned char id, - ax25_address *dest) -{ - struct sock *s; - - spin_lock_bh(&nr_list_lock); - sk_for_each(s, &nr_list) { - struct nr_sock *nr = nr_sk(s); - - if (nr->your_index == index && nr->your_id == id && - !ax25cmp(&nr->dest_addr, dest)) { - sock_hold(s); - goto found; - } - } - s = NULL; -found: - spin_unlock_bh(&nr_list_lock); - return s; -} - -/* - * Find next free circuit ID. - */ -static unsigned short nr_find_next_circuit(void) -{ - unsigned short id = circuit; - unsigned char i, j; - struct sock *sk; - - for (;;) { - i = id / 256; - j = id % 256; - - if (i != 0 && j != 0) { - if ((sk=nr_find_socket(i, j)) == NULL) - break; - sock_put(sk); - } - - id++; - } - - return id; -} - -/* - * Deferred destroy. - */ -void nr_destroy_socket(struct sock *); - -/* - * Handler for deferred kills. - */ -static void nr_destroy_timer(struct timer_list *t) -{ - struct sock *sk = timer_container_of(sk, t, sk_timer); - bh_lock_sock(sk); - sock_hold(sk); - nr_destroy_socket(sk); - bh_unlock_sock(sk); - sock_put(sk); -} - -/* - * This is called from user mode and the timers. Thus it protects itself - * against interrupt users but doesn't worry about being called during - * work. Once it is removed from the queue no interrupt or bottom half - * will touch it and we are (fairly 8-) ) safe. - */ -void nr_destroy_socket(struct sock *sk) -{ - struct sk_buff *skb; - - nr_remove_socket(sk); - - nr_stop_heartbeat(sk); - nr_stop_t1timer(sk); - nr_stop_t2timer(sk); - nr_stop_t4timer(sk); - nr_stop_idletimer(sk); - - nr_clear_queues(sk); /* Flush the queues */ - - while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { - if (skb->sk != sk) { /* A pending connection */ - /* Queue the unaccepted socket for death */ - sock_set_flag(skb->sk, SOCK_DEAD); - nr_start_heartbeat(skb->sk); - nr_sk(skb->sk)->state = NR_STATE_0; - } - - kfree_skb(skb); - } - - if (sk_has_allocations(sk)) { - /* Defer: outstanding buffers */ - sk->sk_timer.function = nr_destroy_timer; - sk->sk_timer.expires = jiffies + 2 * HZ; - add_timer(&sk->sk_timer); - } else - sock_put(sk); -} - -/* - * Handling for system calls applied via the various interfaces to a - * NET/ROM socket object. - */ - -static int nr_setsockopt(struct socket *sock, int level, int optname, - sockptr_t optval, unsigned int optlen) -{ - struct sock *sk = sock->sk; - struct nr_sock *nr = nr_sk(sk); - unsigned int opt; - - if (level != SOL_NETROM) - return -ENOPROTOOPT; - - if (optlen < sizeof(unsigned int)) - return -EINVAL; - - if (copy_from_sockptr(&opt, optval, sizeof(opt))) - return -EFAULT; - - switch (optname) { - case NETROM_T1: - if (opt < 1 || opt > UINT_MAX / HZ) - return -EINVAL; - nr->t1 = opt * HZ; - return 0; - - case NETROM_T2: - if (opt < 1 || opt > UINT_MAX / HZ) - return -EINVAL; - nr->t2 = opt * HZ; - return 0; - - case NETROM_N2: - if (opt < 1 || opt > 31) - return -EINVAL; - nr->n2 = opt; - return 0; - - case NETROM_T4: - if (opt < 1 || opt > UINT_MAX / HZ) - return -EINVAL; - nr->t4 = opt * HZ; - return 0; - - case NETROM_IDLE: - if (opt > UINT_MAX / (60 * HZ)) - return -EINVAL; - nr->idle = opt * 60 * HZ; - return 0; - - default: - return -ENOPROTOOPT; - } -} - -static int nr_getsockopt(struct socket *sock, int level, int optname, - char __user *optval, int __user *optlen) -{ - struct sock *sk = sock->sk; - struct nr_sock *nr = nr_sk(sk); - int val = 0; - int len; - - if (level != SOL_NETROM) - return -ENOPROTOOPT; - - if (get_user(len, optlen)) - return -EFAULT; - - if (len < 0) - return -EINVAL; - - switch (optname) { - case NETROM_T1: - val = nr->t1 / HZ; - break; - - case NETROM_T2: - val = nr->t2 / HZ; - break; - - case NETROM_N2: - val = nr->n2; - break; - - case NETROM_T4: - val = nr->t4 / HZ; - break; - - case NETROM_IDLE: - val = nr->idle / (60 * HZ); - break; - - default: - return -ENOPROTOOPT; - } - - len = min_t(unsigned int, len, sizeof(int)); - - if (put_user(len, optlen)) - return -EFAULT; - - return copy_to_user(optval, &val, len) ? -EFAULT : 0; -} - -static int nr_listen(struct socket *sock, int backlog) -{ - struct sock *sk = sock->sk; - - lock_sock(sk); - if (sock->state != SS_UNCONNECTED) { - release_sock(sk); - return -EINVAL; - } - - if (sk->sk_state != TCP_LISTEN) { - memset(&nr_sk(sk)->user_addr, 0, AX25_ADDR_LEN); - sk->sk_max_ack_backlog = backlog; - sk->sk_state = TCP_LISTEN; - release_sock(sk); - return 0; - } - release_sock(sk); - - return -EOPNOTSUPP; -} - -static struct proto nr_proto = { - .name = "NETROM", - .owner = THIS_MODULE, - .obj_size = sizeof(struct nr_sock), -}; - -static int nr_create(struct net *net, struct socket *sock, int protocol, - int kern) -{ - struct sock *sk; - struct nr_sock *nr; - - if (!net_eq(net, &init_net)) - return -EAFNOSUPPORT; - - if (sock->type != SOCK_SEQPACKET || protocol != 0) - return -ESOCKTNOSUPPORT; - - sk = sk_alloc(net, PF_NETROM, GFP_ATOMIC, &nr_proto, kern); - if (sk == NULL) - return -ENOMEM; - - nr = nr_sk(sk); - - sock_init_data(sock, sk); - - sock->ops = &nr_proto_ops; - sk->sk_protocol = protocol; - - skb_queue_head_init(&nr->ack_queue); - skb_queue_head_init(&nr->reseq_queue); - skb_queue_head_init(&nr->frag_queue); - - nr_init_timers(sk); - - nr->t1 = - msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_timeout)); - nr->t2 = - msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_acknowledge_delay)); - nr->n2 = - msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_maximum_tries)); - nr->t4 = - msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_busy_delay)); - nr->idle = - msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_no_activity_timeout)); - nr->window = READ_ONCE(sysctl_netrom_transport_requested_window_size); - - nr->bpqext = 1; - nr->state = NR_STATE_0; - - return 0; -} - -static struct sock *nr_make_new(struct sock *osk) -{ - struct sock *sk; - struct nr_sock *nr, *onr; - - if (osk->sk_type != SOCK_SEQPACKET) - return NULL; - - sk = sk_alloc(sock_net(osk), PF_NETROM, GFP_ATOMIC, osk->sk_prot, 0); - if (sk == NULL) - return NULL; - - nr = nr_sk(sk); - - sock_init_data(NULL, sk); - - sk->sk_type = osk->sk_type; - sk->sk_priority = READ_ONCE(osk->sk_priority); - sk->sk_protocol = osk->sk_protocol; - sk->sk_rcvbuf = osk->sk_rcvbuf; - sk->sk_sndbuf = osk->sk_sndbuf; - sk->sk_state = TCP_ESTABLISHED; - sock_copy_flags(sk, osk); - - skb_queue_head_init(&nr->ack_queue); - skb_queue_head_init(&nr->reseq_queue); - skb_queue_head_init(&nr->frag_queue); - - nr_init_timers(sk); - - onr = nr_sk(osk); - - nr->t1 = onr->t1; - nr->t2 = onr->t2; - nr->n2 = onr->n2; - nr->t4 = onr->t4; - nr->idle = onr->idle; - nr->window = onr->window; - - nr->device = onr->device; - nr->bpqext = onr->bpqext; - - return sk; -} - -static int nr_release(struct socket *sock) -{ - struct sock *sk = sock->sk; - struct nr_sock *nr; - - if (sk == NULL) return 0; - - sock_hold(sk); - sock_orphan(sk); - lock_sock(sk); - nr = nr_sk(sk); - - switch (nr->state) { - case NR_STATE_0: - case NR_STATE_1: - case NR_STATE_2: - nr_disconnect(sk, 0); - nr_destroy_socket(sk); - break; - - case NR_STATE_3: - nr_clear_queues(sk); - nr->n2count = 0; - nr_write_internal(sk, NR_DISCREQ); - nr_start_t1timer(sk); - nr_stop_t2timer(sk); - nr_stop_t4timer(sk); - nr_stop_idletimer(sk); - nr->state = NR_STATE_2; - sk->sk_state = TCP_CLOSE; - sk->sk_shutdown |= SEND_SHUTDOWN; - sk->sk_state_change(sk); - sock_set_flag(sk, SOCK_DESTROY); - break; - - default: - break; - } - - sock->sk = NULL; - release_sock(sk); - sock_put(sk); - - return 0; -} - -static int nr_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len) -{ - struct sock *sk = sock->sk; - struct nr_sock *nr = nr_sk(sk); - struct full_sockaddr_ax25 *addr = (struct full_sockaddr_ax25 *)uaddr; - struct net_device *dev; - ax25_uid_assoc *user; - ax25_address *source; - - lock_sock(sk); - if (!sock_flag(sk, SOCK_ZAPPED)) { - release_sock(sk); - return -EINVAL; - } - if (addr_len < sizeof(struct sockaddr_ax25) || addr_len > sizeof(struct full_sockaddr_ax25)) { - release_sock(sk); - return -EINVAL; - } - if (addr_len < (addr->fsa_ax25.sax25_ndigis * sizeof(ax25_address) + sizeof(struct sockaddr_ax25))) { - release_sock(sk); - return -EINVAL; - } - if (addr->fsa_ax25.sax25_family != AF_NETROM) { - release_sock(sk); - return -EINVAL; - } - if ((dev = nr_dev_get(&addr->fsa_ax25.sax25_call)) == NULL) { - release_sock(sk); - return -EADDRNOTAVAIL; - } - - /* - * Only the super user can set an arbitrary user callsign. - */ - if (addr->fsa_ax25.sax25_ndigis == 1) { - if (!capable(CAP_NET_BIND_SERVICE)) { - dev_put(dev); - release_sock(sk); - return -EPERM; - } - nr->user_addr = addr->fsa_digipeater[0]; - nr->source_addr = addr->fsa_ax25.sax25_call; - } else { - source = &addr->fsa_ax25.sax25_call; - - user = ax25_findbyuid(current_euid()); - if (user) { - nr->user_addr = user->call; - ax25_uid_put(user); - } else { - if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) { - release_sock(sk); - dev_put(dev); - return -EPERM; - } - nr->user_addr = *source; - } - - nr->source_addr = *source; - } - - nr->device = dev; - nr_insert_socket(sk); - - sock_reset_flag(sk, SOCK_ZAPPED); - dev_put(dev); - release_sock(sk); - - return 0; -} - -static int nr_connect(struct socket *sock, struct sockaddr_unsized *uaddr, - int addr_len, int flags) -{ - struct sock *sk = sock->sk; - struct nr_sock *nr = nr_sk(sk); - struct sockaddr_ax25 *addr = (struct sockaddr_ax25 *)uaddr; - const ax25_address *source = NULL; - ax25_uid_assoc *user; - struct net_device *dev; - int err = 0; - - lock_sock(sk); - if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) { - sock->state = SS_CONNECTED; - goto out_release; /* Connect completed during a ERESTARTSYS event */ - } - - if (sk->sk_state == TCP_CLOSE && sock->state == SS_CONNECTING) { - sock->state = SS_UNCONNECTED; - err = -ECONNREFUSED; - goto out_release; - } - - if (sk->sk_state == TCP_ESTABLISHED) { - err = -EISCONN; /* No reconnect on a seqpacket socket */ - goto out_release; - } - - if (sock->state == SS_CONNECTING) { - err = -EALREADY; - goto out_release; - } - - sk->sk_state = TCP_CLOSE; - sock->state = SS_UNCONNECTED; - - if (addr_len != sizeof(struct sockaddr_ax25) && addr_len != sizeof(struct full_sockaddr_ax25)) { - err = -EINVAL; - goto out_release; - } - if (addr->sax25_family != AF_NETROM) { - err = -EINVAL; - goto out_release; - } - if (sock_flag(sk, SOCK_ZAPPED)) { /* Must bind first - autobinding in this may or may not work */ - sock_reset_flag(sk, SOCK_ZAPPED); - - if ((dev = nr_dev_first()) == NULL) { - err = -ENETUNREACH; - goto out_release; - } - source = (const ax25_address *)dev->dev_addr; - - user = ax25_findbyuid(current_euid()); - if (user) { - nr->user_addr = user->call; - ax25_uid_put(user); - } else { - if (ax25_uid_policy && !capable(CAP_NET_ADMIN)) { - dev_put(dev); - err = -EPERM; - goto out_release; - } - nr->user_addr = *source; - } - - nr->source_addr = *source; - nr->device = dev; - - dev_put(dev); - nr_insert_socket(sk); /* Finish the bind */ - } - - nr->dest_addr = addr->sax25_call; - - release_sock(sk); - circuit = nr_find_next_circuit(); - lock_sock(sk); - - nr->my_index = circuit / 256; - nr->my_id = circuit % 256; - - circuit++; - - /* Move to connecting socket, start sending Connect Requests */ - sock->state = SS_CONNECTING; - sk->sk_state = TCP_SYN_SENT; - - nr_establish_data_link(sk); - - nr->state = NR_STATE_1; - - nr_start_heartbeat(sk); - - /* Now the loop */ - if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) { - err = -EINPROGRESS; - goto out_release; - } - - /* - * A Connect Ack with Choke or timeout or failed routing will go to - * closed. - */ - if (sk->sk_state == TCP_SYN_SENT) { - DEFINE_WAIT(wait); - - for (;;) { - prepare_to_wait(sk_sleep(sk), &wait, - TASK_INTERRUPTIBLE); - if (sk->sk_state != TCP_SYN_SENT) - break; - if (!signal_pending(current)) { - release_sock(sk); - schedule(); - lock_sock(sk); - continue; - } - err = -ERESTARTSYS; - break; - } - finish_wait(sk_sleep(sk), &wait); - if (err) - goto out_release; - } - - if (sk->sk_state != TCP_ESTABLISHED) { - sock->state = SS_UNCONNECTED; - err = sock_error(sk); /* Always set at this point */ - goto out_release; - } - - sock->state = SS_CONNECTED; - -out_release: - release_sock(sk); - - return err; -} - -static int nr_accept(struct socket *sock, struct socket *newsock, - struct proto_accept_arg *arg) -{ - struct sk_buff *skb; - struct sock *newsk; - DEFINE_WAIT(wait); - struct sock *sk; - int err = 0; - - if ((sk = sock->sk) == NULL) - return -EINVAL; - - lock_sock(sk); - if (sk->sk_type != SOCK_SEQPACKET) { - err = -EOPNOTSUPP; - goto out_release; - } - - if (sk->sk_state != TCP_LISTEN) { - err = -EINVAL; - goto out_release; - } - - /* - * The write queue this time is holding sockets ready to use - * hooked into the SABM we saved - */ - for (;;) { - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - skb = skb_dequeue(&sk->sk_receive_queue); - if (skb) - break; - - if (arg->flags & O_NONBLOCK) { - err = -EWOULDBLOCK; - break; - } - if (!signal_pending(current)) { - release_sock(sk); - schedule(); - lock_sock(sk); - continue; - } - err = -ERESTARTSYS; - break; - } - finish_wait(sk_sleep(sk), &wait); - if (err) - goto out_release; - - newsk = skb->sk; - sock_graft(newsk, newsock); - - /* Now attach up the new socket */ - kfree_skb(skb); - sk_acceptq_removed(sk); - -out_release: - release_sock(sk); - - return err; -} - -static int nr_getname(struct socket *sock, struct sockaddr *uaddr, - int peer) -{ - struct full_sockaddr_ax25 *sax = (struct full_sockaddr_ax25 *)uaddr; - struct sock *sk = sock->sk; - struct nr_sock *nr = nr_sk(sk); - int uaddr_len; - - memset(&sax->fsa_ax25, 0, sizeof(struct sockaddr_ax25)); - - lock_sock(sk); - if (peer != 0) { - if (sk->sk_state != TCP_ESTABLISHED) { - release_sock(sk); - return -ENOTCONN; - } - sax->fsa_ax25.sax25_family = AF_NETROM; - sax->fsa_ax25.sax25_ndigis = 1; - sax->fsa_ax25.sax25_call = nr->user_addr; - memset(sax->fsa_digipeater, 0, sizeof(sax->fsa_digipeater)); - sax->fsa_digipeater[0] = nr->dest_addr; - uaddr_len = sizeof(struct full_sockaddr_ax25); - } else { - sax->fsa_ax25.sax25_family = AF_NETROM; - sax->fsa_ax25.sax25_ndigis = 0; - sax->fsa_ax25.sax25_call = nr->source_addr; - uaddr_len = sizeof(struct sockaddr_ax25); - } - release_sock(sk); - - return uaddr_len; -} - -int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) -{ - struct sock *sk; - struct sock *make; - struct nr_sock *nr_make; - ax25_address *src, *dest, *user; - unsigned short circuit_index, circuit_id; - unsigned short peer_circuit_index, peer_circuit_id; - unsigned short frametype, flags, window, timeout; - int ret; - - skb_orphan(skb); - - /* - * skb->data points to the netrom frame start - */ - - src = (ax25_address *)(skb->data + 0); - dest = (ax25_address *)(skb->data + 7); - - circuit_index = skb->data[15]; - circuit_id = skb->data[16]; - peer_circuit_index = skb->data[17]; - peer_circuit_id = skb->data[18]; - frametype = skb->data[19] & 0x0F; - flags = skb->data[19] & 0xF0; - - /* - * Check for an incoming IP over NET/ROM frame. - */ - if (frametype == NR_PROTOEXT && - circuit_index == NR_PROTO_IP && circuit_id == NR_PROTO_IP) { - skb_pull(skb, NR_NETWORK_LEN + NR_TRANSPORT_LEN); - skb_reset_transport_header(skb); - - return nr_rx_ip(skb, dev); - } - - /* - * Find an existing socket connection, based on circuit ID, if it's - * a Connect Request base it on their circuit ID. - * - * Circuit ID 0/0 is not valid but it could still be a "reset" for a - * circuit that no longer exists at the other end ... - */ - - sk = NULL; - - if (circuit_index == 0 && circuit_id == 0) { - if (frametype == NR_CONNACK && flags == NR_CHOKE_FLAG) - sk = nr_find_peer(peer_circuit_index, peer_circuit_id, src); - } else { - if (frametype == NR_CONNREQ) - sk = nr_find_peer(circuit_index, circuit_id, src); - else - sk = nr_find_socket(circuit_index, circuit_id); - } - - if (sk != NULL) { - bh_lock_sock(sk); - skb_reset_transport_header(skb); - - if (frametype == NR_CONNACK && skb->len == 22) - nr_sk(sk)->bpqext = 1; - else - nr_sk(sk)->bpqext = 0; - - ret = nr_process_rx_frame(sk, skb); - bh_unlock_sock(sk); - sock_put(sk); - return ret; - } - - /* - * Now it should be a CONNREQ. - */ - if (frametype != NR_CONNREQ) { - /* - * Here it would be nice to be able to send a reset but - * NET/ROM doesn't have one. We've tried to extend the protocol - * by sending NR_CONNACK | NR_CHOKE_FLAGS replies but that - * apparently kills BPQ boxes... :-( - * So now we try to follow the established behaviour of - * G8PZT's Xrouter which is sending packets with command type 7 - * as an extension of the protocol. - */ - if (READ_ONCE(sysctl_netrom_reset_circuit) && - (frametype != NR_RESET || flags != 0)) - nr_transmit_reset(skb, 1); - - return 0; - } - - sk = nr_find_listener(dest); - - user = (ax25_address *)(skb->data + 21); - - if (sk == NULL || sk_acceptq_is_full(sk) || - (make = nr_make_new(sk)) == NULL) { - nr_transmit_refusal(skb, 0); - if (sk) - sock_put(sk); - return 0; - } - - bh_lock_sock(sk); - - window = skb->data[20]; - - sock_hold(make); - skb->sk = make; - skb->destructor = sock_efree; - make->sk_state = TCP_ESTABLISHED; - - /* Fill in his circuit details */ - nr_make = nr_sk(make); - nr_make->source_addr = *dest; - nr_make->dest_addr = *src; - nr_make->user_addr = *user; - - nr_make->your_index = circuit_index; - nr_make->your_id = circuit_id; - - bh_unlock_sock(sk); - circuit = nr_find_next_circuit(); - bh_lock_sock(sk); - - nr_make->my_index = circuit / 256; - nr_make->my_id = circuit % 256; - - circuit++; - - /* Window negotiation */ - if (window < nr_make->window) - nr_make->window = window; - - /* L4 timeout negotiation */ - if (skb->len == 37) { - timeout = skb->data[36] * 256 + skb->data[35]; - if (timeout * HZ < nr_make->t1) - nr_make->t1 = timeout * HZ; - nr_make->bpqext = 1; - } else { - nr_make->bpqext = 0; - } - - nr_write_internal(make, NR_CONNACK); - - nr_make->condition = 0x00; - nr_make->vs = 0; - nr_make->va = 0; - nr_make->vr = 0; - nr_make->vl = 0; - nr_make->state = NR_STATE_3; - sk_acceptq_added(sk); - skb_queue_head(&sk->sk_receive_queue, skb); - - if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk); - - bh_unlock_sock(sk); - sock_put(sk); - - nr_insert_socket(make); - - nr_start_heartbeat(make); - nr_start_idletimer(make); - - return 1; -} - -static int nr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) -{ - struct sock *sk = sock->sk; - struct nr_sock *nr = nr_sk(sk); - DECLARE_SOCKADDR(struct sockaddr_ax25 *, usax, msg->msg_name); - int err; - struct sockaddr_ax25 sax; - struct sk_buff *skb; - unsigned char *asmptr; - int size; - - if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_CMSG_COMPAT)) - return -EINVAL; - - lock_sock(sk); - if (sock_flag(sk, SOCK_ZAPPED)) { - err = -EADDRNOTAVAIL; - goto out; - } - - if (sk->sk_shutdown & SEND_SHUTDOWN) { - send_sig(SIGPIPE, current, 0); - err = -EPIPE; - goto out; - } - - if (nr->device == NULL) { - err = -ENETUNREACH; - goto out; - } - - if (usax) { - if (msg->msg_namelen < sizeof(sax)) { - err = -EINVAL; - goto out; - } - sax = *usax; - if (ax25cmp(&nr->dest_addr, &sax.sax25_call) != 0) { - err = -EISCONN; - goto out; - } - if (sax.sax25_family != AF_NETROM) { - err = -EINVAL; - goto out; - } - } else { - if (sk->sk_state != TCP_ESTABLISHED) { - err = -ENOTCONN; - goto out; - } - sax.sax25_family = AF_NETROM; - sax.sax25_call = nr->dest_addr; - } - - /* Build a packet - the conventional user limit is 236 bytes. We can - do ludicrously large NetROM frames but must not overflow */ - if (len > 65536) { - err = -EMSGSIZE; - goto out; - } - - size = len + NR_NETWORK_LEN + NR_TRANSPORT_LEN; - - if ((skb = sock_alloc_send_skb(sk, size, msg->msg_flags & MSG_DONTWAIT, &err)) == NULL) - goto out; - - skb_reserve(skb, size - len); - skb_reset_transport_header(skb); - - /* - * Push down the NET/ROM header - */ - - asmptr = skb_push(skb, NR_TRANSPORT_LEN); - - /* Build a NET/ROM Transport header */ - - *asmptr++ = nr->your_index; - *asmptr++ = nr->your_id; - *asmptr++ = 0; /* To be filled in later */ - *asmptr++ = 0; /* Ditto */ - *asmptr++ = NR_INFO; - - /* - * Put the data on the end - */ - skb_put(skb, len); - - /* User data follows immediately after the NET/ROM transport header */ - if (memcpy_from_msg(skb_transport_header(skb), msg, len)) { - kfree_skb(skb); - err = -EFAULT; - goto out; - } - - if (sk->sk_state != TCP_ESTABLISHED) { - kfree_skb(skb); - err = -ENOTCONN; - goto out; - } - - nr_output(sk, skb); /* Shove it onto the queue */ - - err = len; -out: - release_sock(sk); - return err; -} - -static int nr_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, - int flags) -{ - struct sock *sk = sock->sk; - DECLARE_SOCKADDR(struct sockaddr_ax25 *, sax, msg->msg_name); - size_t copied; - struct sk_buff *skb; - int er; - - /* - * This works for seqpacket too. The receiver has ordered the queue for - * us! We do one quick check first though - */ - - lock_sock(sk); - if (sk->sk_state != TCP_ESTABLISHED) { - release_sock(sk); - return -ENOTCONN; - } - - /* Now we can treat all alike */ - skb = skb_recv_datagram(sk, flags, &er); - if (!skb) { - release_sock(sk); - return er; - } - - skb_reset_transport_header(skb); - copied = skb->len; - - if (copied > size) { - copied = size; - msg->msg_flags |= MSG_TRUNC; - } - - er = skb_copy_datagram_msg(skb, 0, msg, copied); - if (er < 0) { - skb_free_datagram(sk, skb); - release_sock(sk); - return er; - } - - if (sax != NULL) { - memset(sax, 0, sizeof(*sax)); - sax->sax25_family = AF_NETROM; - skb_copy_from_linear_data_offset(skb, 7, sax->sax25_call.ax25_call, - AX25_ADDR_LEN); - msg->msg_namelen = sizeof(*sax); - } - - skb_free_datagram(sk, skb); - - release_sock(sk); - return copied; -} - - -static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) -{ - struct sock *sk = sock->sk; - void __user *argp = (void __user *)arg; - - switch (cmd) { - case TIOCOUTQ: { - long amount; - - lock_sock(sk); - amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); - if (amount < 0) - amount = 0; - release_sock(sk); - return put_user(amount, (int __user *)argp); - } - - case TIOCINQ: { - struct sk_buff *skb; - long amount = 0L; - - lock_sock(sk); - /* These two are safe on a single CPU system as only user tasks fiddle here */ - if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) - amount = skb->len; - release_sock(sk); - return put_user(amount, (int __user *)argp); - } - - case SIOCGIFADDR: - case SIOCSIFADDR: - case SIOCGIFDSTADDR: - case SIOCSIFDSTADDR: - case SIOCGIFBRDADDR: - case SIOCSIFBRDADDR: - case SIOCGIFNETMASK: - case SIOCSIFNETMASK: - case SIOCGIFMETRIC: - case SIOCSIFMETRIC: - return -EINVAL; - - case SIOCADDRT: - case SIOCDELRT: - case SIOCNRDECOBS: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - return nr_rt_ioctl(cmd, argp); - - default: - return -ENOIOCTLCMD; - } - - return 0; -} - -#ifdef CONFIG_PROC_FS - -static void *nr_info_start(struct seq_file *seq, loff_t *pos) - __acquires(&nr_list_lock) -{ - spin_lock_bh(&nr_list_lock); - return seq_hlist_start_head(&nr_list, *pos); -} - -static void *nr_info_next(struct seq_file *seq, void *v, loff_t *pos) -{ - return seq_hlist_next(v, &nr_list, pos); -} - -static void nr_info_stop(struct seq_file *seq, void *v) - __releases(&nr_list_lock) -{ - spin_unlock_bh(&nr_list_lock); -} - -static int nr_info_show(struct seq_file *seq, void *v) -{ - struct sock *s = sk_entry(v); - struct net_device *dev; - struct nr_sock *nr; - const char *devname; - char buf[11]; - - if (v == SEQ_START_TOKEN) - seq_puts(seq, -"user_addr dest_node src_node dev my your st vs vr va t1 t2 t4 idle n2 wnd Snd-Q Rcv-Q inode\n"); - - else { - - bh_lock_sock(s); - nr = nr_sk(s); - - if ((dev = nr->device) == NULL) - devname = "???"; - else - devname = dev->name; - - seq_printf(seq, "%-9s ", ax2asc(buf, &nr->user_addr)); - seq_printf(seq, "%-9s ", ax2asc(buf, &nr->dest_addr)); - seq_printf(seq, -"%-9s %-3s %02X/%02X %02X/%02X %2d %3d %3d %3d %3lu/%03lu %2lu/%02lu %3lu/%03lu %3lu/%03lu %2d/%02d %3d %5d %5d %llu\n", - ax2asc(buf, &nr->source_addr), - devname, - nr->my_index, - nr->my_id, - nr->your_index, - nr->your_id, - nr->state, - nr->vs, - nr->vr, - nr->va, - ax25_display_timer(&nr->t1timer) / HZ, - nr->t1 / HZ, - ax25_display_timer(&nr->t2timer) / HZ, - nr->t2 / HZ, - ax25_display_timer(&nr->t4timer) / HZ, - nr->t4 / HZ, - ax25_display_timer(&nr->idletimer) / (60 * HZ), - nr->idle / (60 * HZ), - nr->n2count, - nr->n2, - nr->window, - sk_wmem_alloc_get(s), - sk_rmem_alloc_get(s), - s->sk_socket ? SOCK_INODE(s->sk_socket)->i_ino : (u64)0); - - bh_unlock_sock(s); - } - return 0; -} - -static const struct seq_operations nr_info_seqops = { - .start = nr_info_start, - .next = nr_info_next, - .stop = nr_info_stop, - .show = nr_info_show, -}; -#endif /* CONFIG_PROC_FS */ - -static const struct net_proto_family nr_family_ops = { - .family = PF_NETROM, - .create = nr_create, - .owner = THIS_MODULE, -}; - -static const struct proto_ops nr_proto_ops = { - .family = PF_NETROM, - .owner = THIS_MODULE, - .release = nr_release, - .bind = nr_bind, - .connect = nr_connect, - .socketpair = sock_no_socketpair, - .accept = nr_accept, - .getname = nr_getname, - .poll = datagram_poll, - .ioctl = nr_ioctl, - .gettstamp = sock_gettstamp, - .listen = nr_listen, - .shutdown = sock_no_shutdown, - .setsockopt = nr_setsockopt, - .getsockopt = nr_getsockopt, - .sendmsg = nr_sendmsg, - .recvmsg = nr_recvmsg, - .mmap = sock_no_mmap, -}; - -static struct notifier_block nr_dev_notifier = { - .notifier_call = nr_device_event, -}; - -static struct net_device **dev_nr; - -static struct ax25_protocol nr_pid = { - .pid = AX25_P_NETROM, - .func = nr_route_frame -}; - -static struct ax25_linkfail nr_linkfail_notifier = { - .func = nr_link_failed, -}; - -static int __init nr_proto_init(void) -{ - int i; - int rc = proto_register(&nr_proto, 0); - - if (rc) - return rc; - - if (nr_ndevs > 0x7fffffff/sizeof(struct net_device *)) { - pr_err("NET/ROM: %s - nr_ndevs parameter too large\n", - __func__); - rc = -EINVAL; - goto unregister_proto; - } - - dev_nr = kzalloc_objs(struct net_device *, nr_ndevs); - if (!dev_nr) { - pr_err("NET/ROM: %s - unable to allocate device array\n", - __func__); - rc = -ENOMEM; - goto unregister_proto; - } - - for (i = 0; i < nr_ndevs; i++) { - char name[IFNAMSIZ]; - struct net_device *dev; - - sprintf(name, "nr%d", i); - dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, nr_setup); - if (!dev) { - rc = -ENOMEM; - goto fail; - } - - dev->base_addr = i; - rc = register_netdev(dev); - if (rc) { - free_netdev(dev); - goto fail; - } - nr_set_lockdep_key(dev); - dev_nr[i] = dev; - } - - rc = sock_register(&nr_family_ops); - if (rc) - goto fail; - - rc = register_netdevice_notifier(&nr_dev_notifier); - if (rc) - goto out_sock; - - ax25_register_pid(&nr_pid); - ax25_linkfail_register(&nr_linkfail_notifier); - -#ifdef CONFIG_SYSCTL - rc = nr_register_sysctl(); - if (rc) - goto out_sysctl; -#endif - - nr_loopback_init(); - - rc = -ENOMEM; - if (!proc_create_seq("nr", 0444, init_net.proc_net, &nr_info_seqops)) - goto proc_remove1; - if (!proc_create_seq("nr_neigh", 0444, init_net.proc_net, - &nr_neigh_seqops)) - goto proc_remove2; - if (!proc_create_seq("nr_nodes", 0444, init_net.proc_net, - &nr_node_seqops)) - goto proc_remove3; - - return 0; - -proc_remove3: - remove_proc_entry("nr_neigh", init_net.proc_net); -proc_remove2: - remove_proc_entry("nr", init_net.proc_net); -proc_remove1: - - nr_loopback_clear(); - nr_rt_free(); - -#ifdef CONFIG_SYSCTL - nr_unregister_sysctl(); -out_sysctl: -#endif - ax25_linkfail_release(&nr_linkfail_notifier); - ax25_protocol_release(AX25_P_NETROM); - unregister_netdevice_notifier(&nr_dev_notifier); -out_sock: - sock_unregister(PF_NETROM); -fail: - while (--i >= 0) { - unregister_netdev(dev_nr[i]); - free_netdev(dev_nr[i]); - } - kfree(dev_nr); -unregister_proto: - proto_unregister(&nr_proto); - return rc; -} - -module_init(nr_proto_init); - -module_param(nr_ndevs, int, 0); -MODULE_PARM_DESC(nr_ndevs, "number of NET/ROM devices"); - -MODULE_AUTHOR("Jonathan Naylor G4KLX <g4klx@g4klx.demon.co.uk>"); -MODULE_DESCRIPTION("The amateur radio NET/ROM network and transport layer protocol"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_NETPROTO(PF_NETROM); - -static void __exit nr_exit(void) -{ - int i; - - remove_proc_entry("nr", init_net.proc_net); - remove_proc_entry("nr_neigh", init_net.proc_net); - remove_proc_entry("nr_nodes", init_net.proc_net); - nr_loopback_clear(); - - nr_rt_free(); - -#ifdef CONFIG_SYSCTL - nr_unregister_sysctl(); -#endif - - ax25_linkfail_release(&nr_linkfail_notifier); - ax25_protocol_release(AX25_P_NETROM); - - unregister_netdevice_notifier(&nr_dev_notifier); - - sock_unregister(PF_NETROM); - - for (i = 0; i < nr_ndevs; i++) { - struct net_device *dev = dev_nr[i]; - if (dev) { - unregister_netdev(dev); - free_netdev(dev); - } - } - - kfree(dev_nr); - proto_unregister(&nr_proto); -} -module_exit(nr_exit); diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c deleted file mode 100644 index 2c34389c3ce6..000000000000 --- a/net/netrom/nr_dev.c +++ /dev/null @@ -1,178 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - */ -#include <linux/module.h> -#include <linux/proc_fs.h> -#include <linux/kernel.h> -#include <linux/interrupt.h> -#include <linux/fs.h> -#include <linux/types.h> -#include <linux/sysctl.h> -#include <linux/string.h> -#include <linux/socket.h> -#include <linux/errno.h> -#include <linux/fcntl.h> -#include <linux/in.h> -#include <linux/if_ether.h> /* For the statistics structure. */ -#include <linux/slab.h> -#include <linux/uaccess.h> - -#include <asm/io.h> - -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/if_arp.h> -#include <linux/skbuff.h> - -#include <net/ip.h> -#include <net/arp.h> - -#include <net/ax25.h> -#include <net/netrom.h> - -/* - * Only allow IP over NET/ROM frames through if the netrom device is up. - */ - -int nr_rx_ip(struct sk_buff *skb, struct net_device *dev) -{ - struct net_device_stats *stats = &dev->stats; - - if (!netif_running(dev)) { - stats->rx_dropped++; - return 0; - } - - stats->rx_packets++; - stats->rx_bytes += skb->len; - - skb->protocol = htons(ETH_P_IP); - - /* Spoof incoming device */ - skb->dev = dev; - skb->mac_header = skb->network_header; - skb_reset_network_header(skb); - skb->pkt_type = PACKET_HOST; - - netif_rx(skb); - - return 1; -} - -static int nr_header(struct sk_buff *skb, struct net_device *dev, - unsigned short type, - const void *daddr, const void *saddr, unsigned int len) -{ - unsigned char *buff = skb_push(skb, NR_NETWORK_LEN + NR_TRANSPORT_LEN); - - memcpy(buff, (saddr != NULL) ? saddr : dev->dev_addr, dev->addr_len); - buff[6] &= ~AX25_CBIT; - buff[6] &= ~AX25_EBIT; - buff[6] |= AX25_SSSID_SPARE; - buff += AX25_ADDR_LEN; - - if (daddr != NULL) - memcpy(buff, daddr, dev->addr_len); - buff[6] &= ~AX25_CBIT; - buff[6] |= AX25_EBIT; - buff[6] |= AX25_SSSID_SPARE; - buff += AX25_ADDR_LEN; - - *buff++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); - - *buff++ = NR_PROTO_IP; - *buff++ = NR_PROTO_IP; - *buff++ = 0; - *buff++ = 0; - *buff++ = NR_PROTOEXT; - - if (daddr != NULL) - return 37; - - return -37; -} - -static int __must_check nr_set_mac_address(struct net_device *dev, void *addr) -{ - struct sockaddr *sa = addr; - int err; - - if (!memcmp(dev->dev_addr, sa->sa_data, dev->addr_len)) - return 0; - - if (dev->flags & IFF_UP) { - err = ax25_listen_register((ax25_address *)sa->sa_data, NULL); - if (err) - return err; - - ax25_listen_release((const ax25_address *)dev->dev_addr, NULL); - } - - dev_addr_set(dev, sa->sa_data); - - return 0; -} - -static int nr_open(struct net_device *dev) -{ - int err; - - err = ax25_listen_register((const ax25_address *)dev->dev_addr, NULL); - if (err) - return err; - - netif_start_queue(dev); - - return 0; -} - -static int nr_close(struct net_device *dev) -{ - ax25_listen_release((const ax25_address *)dev->dev_addr, NULL); - netif_stop_queue(dev); - return 0; -} - -static netdev_tx_t nr_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct net_device_stats *stats = &dev->stats; - unsigned int len = skb->len; - - if (!nr_route_frame(skb, NULL)) { - kfree_skb(skb); - stats->tx_errors++; - return NETDEV_TX_OK; - } - - stats->tx_packets++; - stats->tx_bytes += len; - - return NETDEV_TX_OK; -} - -static const struct header_ops nr_header_ops = { - .create = nr_header, -}; - -static const struct net_device_ops nr_netdev_ops = { - .ndo_open = nr_open, - .ndo_stop = nr_close, - .ndo_start_xmit = nr_xmit, - .ndo_set_mac_address = nr_set_mac_address, -}; - -void nr_setup(struct net_device *dev) -{ - dev->mtu = NR_MAX_PACKET_SIZE; - dev->netdev_ops = &nr_netdev_ops; - dev->header_ops = &nr_header_ops; - dev->hard_header_len = NR_NETWORK_LEN + NR_TRANSPORT_LEN; - dev->addr_len = AX25_ADDR_LEN; - dev->type = ARPHRD_NETROM; - - /* New-style flags. */ - dev->flags = IFF_NOARP; -} diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c deleted file mode 100644 index 97944db6b5ac..000000000000 --- a/net/netrom/nr_in.c +++ /dev/null @@ -1,301 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - * Copyright Darryl Miles G7LED (dlm@g7led.demon.co.uk) - */ -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/slab.h> -#include <net/ax25.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <net/tcp_states.h> -#include <linux/uaccess.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <net/netrom.h> - -static int nr_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more) -{ - struct sk_buff *skbo, *skbn = skb; - struct nr_sock *nr = nr_sk(sk); - - skb_pull(skb, NR_NETWORK_LEN + NR_TRANSPORT_LEN); - - nr_start_idletimer(sk); - - if (more) { - nr->fraglen += skb->len; - skb_queue_tail(&nr->frag_queue, skb); - return 0; - } - - if (!more && nr->fraglen > 0) { /* End of fragment */ - nr->fraglen += skb->len; - skb_queue_tail(&nr->frag_queue, skb); - - if ((skbn = alloc_skb(nr->fraglen, GFP_ATOMIC)) == NULL) - return 1; - - skb_reset_transport_header(skbn); - - while ((skbo = skb_dequeue(&nr->frag_queue)) != NULL) { - skb_copy_from_linear_data(skbo, - skb_put(skbn, skbo->len), - skbo->len); - kfree_skb(skbo); - } - - nr->fraglen = 0; - } - - return sock_queue_rcv_skb(sk, skbn); -} - -/* - * State machine for state 1, Awaiting Connection State. - * The handling of the timer(s) is in file nr_timer.c. - * Handling of state 0 and connection release is in netrom.c. - */ -static int nr_state1_machine(struct sock *sk, struct sk_buff *skb, - int frametype) -{ - switch (frametype) { - case NR_CONNACK: { - struct nr_sock *nr = nr_sk(sk); - - nr_stop_t1timer(sk); - nr_start_idletimer(sk); - nr->your_index = skb->data[17]; - nr->your_id = skb->data[18]; - nr->vs = 0; - nr->va = 0; - nr->vr = 0; - nr->vl = 0; - nr->state = NR_STATE_3; - nr->n2count = 0; - nr->window = skb->data[20]; - sk->sk_state = TCP_ESTABLISHED; - if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_state_change(sk); - break; - } - - case NR_CONNACK | NR_CHOKE_FLAG: - nr_disconnect(sk, ECONNREFUSED); - break; - - case NR_RESET: - if (READ_ONCE(sysctl_netrom_reset_circuit)) - nr_disconnect(sk, ECONNRESET); - break; - - default: - break; - } - return 0; -} - -/* - * State machine for state 2, Awaiting Release State. - * The handling of the timer(s) is in file nr_timer.c - * Handling of state 0 and connection release is in netrom.c. - */ -static int nr_state2_machine(struct sock *sk, struct sk_buff *skb, - int frametype) -{ - switch (frametype) { - case NR_CONNACK | NR_CHOKE_FLAG: - nr_disconnect(sk, ECONNRESET); - break; - - case NR_DISCREQ: - nr_write_internal(sk, NR_DISCACK); - fallthrough; - case NR_DISCACK: - nr_disconnect(sk, 0); - break; - - case NR_RESET: - if (READ_ONCE(sysctl_netrom_reset_circuit)) - nr_disconnect(sk, ECONNRESET); - break; - - default: - break; - } - return 0; -} - -/* - * State machine for state 3, Connected State. - * The handling of the timer(s) is in file nr_timer.c - * Handling of state 0 and connection release is in netrom.c. - */ -static int nr_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype) -{ - struct nr_sock *nrom = nr_sk(sk); - struct sk_buff_head temp_queue; - struct sk_buff *skbn; - unsigned short save_vr; - unsigned short nr, ns; - int queued = 0; - - nr = skb->data[18]; - - switch (frametype) { - case NR_CONNREQ: - nr_write_internal(sk, NR_CONNACK); - break; - - case NR_DISCREQ: - nr_write_internal(sk, NR_DISCACK); - nr_disconnect(sk, 0); - break; - - case NR_CONNACK | NR_CHOKE_FLAG: - case NR_DISCACK: - nr_disconnect(sk, ECONNRESET); - break; - - case NR_INFOACK: - case NR_INFOACK | NR_CHOKE_FLAG: - case NR_INFOACK | NR_NAK_FLAG: - case NR_INFOACK | NR_NAK_FLAG | NR_CHOKE_FLAG: - if (frametype & NR_CHOKE_FLAG) { - nrom->condition |= NR_COND_PEER_RX_BUSY; - nr_start_t4timer(sk); - } else { - nrom->condition &= ~NR_COND_PEER_RX_BUSY; - nr_stop_t4timer(sk); - } - if (!nr_validate_nr(sk, nr)) { - break; - } - if (frametype & NR_NAK_FLAG) { - nr_frames_acked(sk, nr); - nr_send_nak_frame(sk); - } else { - if (nrom->condition & NR_COND_PEER_RX_BUSY) { - nr_frames_acked(sk, nr); - } else { - nr_check_iframes_acked(sk, nr); - } - } - break; - - case NR_INFO: - case NR_INFO | NR_NAK_FLAG: - case NR_INFO | NR_CHOKE_FLAG: - case NR_INFO | NR_MORE_FLAG: - case NR_INFO | NR_NAK_FLAG | NR_CHOKE_FLAG: - case NR_INFO | NR_CHOKE_FLAG | NR_MORE_FLAG: - case NR_INFO | NR_NAK_FLAG | NR_MORE_FLAG: - case NR_INFO | NR_NAK_FLAG | NR_CHOKE_FLAG | NR_MORE_FLAG: - if (frametype & NR_CHOKE_FLAG) { - nrom->condition |= NR_COND_PEER_RX_BUSY; - nr_start_t4timer(sk); - } else { - nrom->condition &= ~NR_COND_PEER_RX_BUSY; - nr_stop_t4timer(sk); - } - if (nr_validate_nr(sk, nr)) { - if (frametype & NR_NAK_FLAG) { - nr_frames_acked(sk, nr); - nr_send_nak_frame(sk); - } else { - if (nrom->condition & NR_COND_PEER_RX_BUSY) { - nr_frames_acked(sk, nr); - } else { - nr_check_iframes_acked(sk, nr); - } - } - } - queued = 1; - skb_queue_head(&nrom->reseq_queue, skb); - if (nrom->condition & NR_COND_OWN_RX_BUSY) - break; - skb_queue_head_init(&temp_queue); - do { - save_vr = nrom->vr; - while ((skbn = skb_dequeue(&nrom->reseq_queue)) != NULL) { - ns = skbn->data[17]; - if (ns == nrom->vr) { - if (nr_queue_rx_frame(sk, skbn, frametype & NR_MORE_FLAG) == 0) { - nrom->vr = (nrom->vr + 1) % NR_MODULUS; - } else { - nrom->condition |= NR_COND_OWN_RX_BUSY; - skb_queue_tail(&temp_queue, skbn); - } - } else if (nr_in_rx_window(sk, ns)) { - skb_queue_tail(&temp_queue, skbn); - } else { - kfree_skb(skbn); - } - } - while ((skbn = skb_dequeue(&temp_queue)) != NULL) { - skb_queue_tail(&nrom->reseq_queue, skbn); - } - } while (save_vr != nrom->vr); - /* - * Window is full, ack it immediately. - */ - if (((nrom->vl + nrom->window) % NR_MODULUS) == nrom->vr) { - nr_enquiry_response(sk); - } else { - if (!(nrom->condition & NR_COND_ACK_PENDING)) { - nrom->condition |= NR_COND_ACK_PENDING; - nr_start_t2timer(sk); - } - } - break; - - case NR_RESET: - if (READ_ONCE(sysctl_netrom_reset_circuit)) - nr_disconnect(sk, ECONNRESET); - break; - - default: - break; - } - return queued; -} - -/* Higher level upcall for a LAPB frame - called with sk locked */ -int nr_process_rx_frame(struct sock *sk, struct sk_buff *skb) -{ - struct nr_sock *nr = nr_sk(sk); - int queued = 0, frametype; - - if (nr->state == NR_STATE_0) - return 0; - - frametype = skb->data[19]; - - switch (nr->state) { - case NR_STATE_1: - queued = nr_state1_machine(sk, skb, frametype); - break; - case NR_STATE_2: - queued = nr_state2_machine(sk, skb, frametype); - break; - case NR_STATE_3: - queued = nr_state3_machine(sk, skb, frametype); - break; - } - - nr_kick(sk); - - return queued; -} diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c deleted file mode 100644 index 7a9d765b30c0..000000000000 --- a/net/netrom/nr_loopback.c +++ /dev/null @@ -1,73 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright Tomi Manninen OH2BNS (oh2bns@sral.fi) - */ -#include <linux/types.h> -#include <linux/slab.h> -#include <linux/socket.h> -#include <linux/timer.h> -#include <net/ax25.h> -#include <linux/skbuff.h> -#include <net/netrom.h> -#include <linux/init.h> - -static void nr_loopback_timer(struct timer_list *); - -static struct sk_buff_head loopback_queue; -static DEFINE_TIMER(loopback_timer, nr_loopback_timer); - -void __init nr_loopback_init(void) -{ - skb_queue_head_init(&loopback_queue); -} - -static inline int nr_loopback_running(void) -{ - return timer_pending(&loopback_timer); -} - -int nr_loopback_queue(struct sk_buff *skb) -{ - struct sk_buff *skbn; - - if ((skbn = alloc_skb(skb->len, GFP_ATOMIC)) != NULL) { - skb_copy_from_linear_data(skb, skb_put(skbn, skb->len), skb->len); - skb_reset_transport_header(skbn); - - skb_queue_tail(&loopback_queue, skbn); - - if (!nr_loopback_running()) - mod_timer(&loopback_timer, jiffies + 10); - } - - kfree_skb(skb); - return 1; -} - -static void nr_loopback_timer(struct timer_list *unused) -{ - struct sk_buff *skb; - ax25_address *nr_dest; - struct net_device *dev; - - if ((skb = skb_dequeue(&loopback_queue)) != NULL) { - nr_dest = (ax25_address *)(skb->data + 7); - - dev = nr_dev_get(nr_dest); - - if (dev == NULL || nr_rx_frame(skb, dev) == 0) - kfree_skb(skb); - - dev_put(dev); - - if (!skb_queue_empty(&loopback_queue) && !nr_loopback_running()) - mod_timer(&loopback_timer, jiffies + 10); - } -} - -void nr_loopback_clear(void) -{ - timer_delete_sync(&loopback_timer); - skb_queue_purge(&loopback_queue); -} diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c deleted file mode 100644 index 2b3cbceb0b52..000000000000 --- a/net/netrom/nr_out.c +++ /dev/null @@ -1,272 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - * Copyright Darryl Miles G7LED (dlm@g7led.demon.co.uk) - */ -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/slab.h> -#include <net/ax25.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <linux/uaccess.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <net/netrom.h> - -/* - * This is where all NET/ROM frames pass, except for IP-over-NET/ROM which - * cannot be fragmented in this manner. - */ -void nr_output(struct sock *sk, struct sk_buff *skb) -{ - struct sk_buff *skbn; - unsigned char transport[NR_TRANSPORT_LEN]; - int err, frontlen, len; - - if (skb->len - NR_TRANSPORT_LEN > NR_MAX_PACKET_SIZE) { - /* Save a copy of the Transport Header */ - skb_copy_from_linear_data(skb, transport, NR_TRANSPORT_LEN); - skb_pull(skb, NR_TRANSPORT_LEN); - - frontlen = skb_headroom(skb); - - while (skb->len > 0) { - if ((skbn = sock_alloc_send_skb(sk, frontlen + NR_MAX_PACKET_SIZE, 0, &err)) == NULL) { - kfree_skb(skb); - return; - } - - skb_reserve(skbn, frontlen); - - len = (NR_MAX_PACKET_SIZE > skb->len) ? skb->len : NR_MAX_PACKET_SIZE; - - /* Copy the user data */ - skb_copy_from_linear_data(skb, skb_put(skbn, len), len); - skb_pull(skb, len); - - /* Duplicate the Transport Header */ - skb_push(skbn, NR_TRANSPORT_LEN); - skb_copy_to_linear_data(skbn, transport, - NR_TRANSPORT_LEN); - if (skb->len > 0) - skbn->data[4] |= NR_MORE_FLAG; - - skb_queue_tail(&sk->sk_write_queue, skbn); /* Throw it on the queue */ - } - - kfree_skb(skb); - } else { - skb_queue_tail(&sk->sk_write_queue, skb); /* Throw it on the queue */ - } - - nr_kick(sk); -} - -/* - * This procedure is passed a buffer descriptor for an iframe. It builds - * the rest of the control part of the frame and then writes it out. - */ -static void nr_send_iframe(struct sock *sk, struct sk_buff *skb) -{ - struct nr_sock *nr = nr_sk(sk); - - if (skb == NULL) - return; - - skb->data[2] = nr->vs; - skb->data[3] = nr->vr; - - if (nr->condition & NR_COND_OWN_RX_BUSY) - skb->data[4] |= NR_CHOKE_FLAG; - - nr_start_idletimer(sk); - - nr_transmit_buffer(sk, skb); -} - -void nr_send_nak_frame(struct sock *sk) -{ - struct sk_buff *skb, *skbn; - struct nr_sock *nr = nr_sk(sk); - - if ((skb = skb_peek(&nr->ack_queue)) == NULL) - return; - - if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) - return; - - skbn->data[2] = nr->va; - skbn->data[3] = nr->vr; - - if (nr->condition & NR_COND_OWN_RX_BUSY) - skbn->data[4] |= NR_CHOKE_FLAG; - - nr_transmit_buffer(sk, skbn); - - nr->condition &= ~NR_COND_ACK_PENDING; - nr->vl = nr->vr; - - nr_stop_t1timer(sk); -} - -void nr_kick(struct sock *sk) -{ - struct nr_sock *nr = nr_sk(sk); - struct sk_buff *skb, *skbn; - unsigned short start, end; - - if (nr->state != NR_STATE_3) - return; - - if (nr->condition & NR_COND_PEER_RX_BUSY) - return; - - if (!skb_peek(&sk->sk_write_queue)) - return; - - start = (skb_peek(&nr->ack_queue) == NULL) ? nr->va : nr->vs; - end = (nr->va + nr->window) % NR_MODULUS; - - if (start == end) - return; - - nr->vs = start; - - /* - * Transmit data until either we're out of data to send or - * the window is full. - */ - - /* - * Dequeue the frame and copy it. - */ - skb = skb_dequeue(&sk->sk_write_queue); - - do { - if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) { - skb_queue_head(&sk->sk_write_queue, skb); - break; - } - - skb_set_owner_w(skbn, sk); - - /* - * Transmit the frame copy. - */ - nr_send_iframe(sk, skbn); - - nr->vs = (nr->vs + 1) % NR_MODULUS; - - /* - * Requeue the original data frame. - */ - skb_queue_tail(&nr->ack_queue, skb); - - } while (nr->vs != end && - (skb = skb_dequeue(&sk->sk_write_queue)) != NULL); - - nr->vl = nr->vr; - nr->condition &= ~NR_COND_ACK_PENDING; - - if (!nr_t1timer_running(sk)) - nr_start_t1timer(sk); -} - -void nr_transmit_buffer(struct sock *sk, struct sk_buff *skb) -{ - struct nr_sock *nr = nr_sk(sk); - unsigned char *dptr; - - /* - * Add the protocol byte and network header. - */ - dptr = skb_push(skb, NR_NETWORK_LEN); - - memcpy(dptr, &nr->source_addr, AX25_ADDR_LEN); - dptr[6] &= ~AX25_CBIT; - dptr[6] &= ~AX25_EBIT; - dptr[6] |= AX25_SSSID_SPARE; - dptr += AX25_ADDR_LEN; - - memcpy(dptr, &nr->dest_addr, AX25_ADDR_LEN); - dptr[6] &= ~AX25_CBIT; - dptr[6] |= AX25_EBIT; - dptr[6] |= AX25_SSSID_SPARE; - dptr += AX25_ADDR_LEN; - - *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); - - if (!nr_route_frame(skb, NULL)) { - kfree_skb(skb); - nr_disconnect(sk, ENETUNREACH); - } -} - -/* - * The following routines are taken from page 170 of the 7th ARRL Computer - * Networking Conference paper, as is the whole state machine. - */ - -void nr_establish_data_link(struct sock *sk) -{ - struct nr_sock *nr = nr_sk(sk); - - nr->condition = 0x00; - nr->n2count = 0; - - nr_write_internal(sk, NR_CONNREQ); - - nr_stop_t2timer(sk); - nr_stop_t4timer(sk); - nr_stop_idletimer(sk); - nr_start_t1timer(sk); -} - -/* - * Never send a NAK when we are CHOKEd. - */ -void nr_enquiry_response(struct sock *sk) -{ - struct nr_sock *nr = nr_sk(sk); - int frametype = NR_INFOACK; - - if (nr->condition & NR_COND_OWN_RX_BUSY) { - frametype |= NR_CHOKE_FLAG; - } else { - if (skb_peek(&nr->reseq_queue) != NULL) - frametype |= NR_NAK_FLAG; - } - - nr_write_internal(sk, frametype); - - nr->vl = nr->vr; - nr->condition &= ~NR_COND_ACK_PENDING; -} - -void nr_check_iframes_acked(struct sock *sk, unsigned short nr) -{ - struct nr_sock *nrom = nr_sk(sk); - - if (nrom->vs == nr) { - nr_frames_acked(sk, nr); - nr_stop_t1timer(sk); - nrom->n2count = 0; - } else { - if (nrom->va != nr) { - nr_frames_acked(sk, nr); - nr_start_t1timer(sk); - } - } -} diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c deleted file mode 100644 index 9cc29ae85b06..000000000000 --- a/net/netrom/nr_route.c +++ /dev/null @@ -1,989 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - * Copyright Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk) - * Copyright Tomi Manninen OH2BNS (oh2bns@sral.fi) - */ -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/slab.h> -#include <net/ax25.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <net/arp.h> -#include <linux/if_arp.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <linux/uaccess.h> -#include <linux/fcntl.h> -#include <linux/termios.h> /* For TIOCINQ/OUTQ */ -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <linux/notifier.h> -#include <linux/init.h> -#include <linux/spinlock.h> -#include <net/netrom.h> -#include <linux/seq_file.h> -#include <linux/export.h> - -static unsigned int nr_neigh_no = 1; - -static HLIST_HEAD(nr_node_list); -static DEFINE_SPINLOCK(nr_node_list_lock); -static HLIST_HEAD(nr_neigh_list); -static DEFINE_SPINLOCK(nr_neigh_list_lock); - -static struct nr_node *nr_node_get(ax25_address *callsign) -{ - struct nr_node *found = NULL; - struct nr_node *nr_node; - - spin_lock_bh(&nr_node_list_lock); - nr_node_for_each(nr_node, &nr_node_list) - if (ax25cmp(callsign, &nr_node->callsign) == 0) { - nr_node_hold(nr_node); - found = nr_node; - break; - } - spin_unlock_bh(&nr_node_list_lock); - return found; -} - -static struct nr_neigh *nr_neigh_get_dev(ax25_address *callsign, - struct net_device *dev) -{ - struct nr_neigh *found = NULL; - struct nr_neigh *nr_neigh; - - spin_lock_bh(&nr_neigh_list_lock); - nr_neigh_for_each(nr_neigh, &nr_neigh_list) - if (ax25cmp(callsign, &nr_neigh->callsign) == 0 && - nr_neigh->dev == dev) { - nr_neigh_hold(nr_neigh); - found = nr_neigh; - break; - } - spin_unlock_bh(&nr_neigh_list_lock); - return found; -} - -static void nr_remove_neigh(struct nr_neigh *); - -/* re-sort the routes in quality order. */ -static void re_sort_routes(struct nr_node *nr_node, int x, int y) -{ - if (nr_node->routes[y].quality > nr_node->routes[x].quality) { - if (nr_node->which == x) - nr_node->which = y; - else if (nr_node->which == y) - nr_node->which = x; - - swap(nr_node->routes[x], nr_node->routes[y]); - } -} - -/* - * Add a new route to a node, and in the process add the node and the - * neighbour if it is new. - */ -static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, - ax25_address *ax25, ax25_digi *ax25_digi, struct net_device *dev, - int quality, int obs_count) -{ - struct nr_node *nr_node; - struct nr_neigh *nr_neigh; - int i, found; - struct net_device *odev; - - if ((odev=nr_dev_get(nr)) != NULL) { /* Can't add routes to ourself */ - dev_put(odev); - return -EINVAL; - } - - nr_node = nr_node_get(nr); - - nr_neigh = nr_neigh_get_dev(ax25, dev); - - /* - * The L2 link to a neighbour has failed in the past - * and now a frame comes from this neighbour. We assume - * it was a temporary trouble with the link and reset the - * routes now (and not wait for a node broadcast). - */ - if (nr_neigh != NULL && nr_neigh->failed != 0 && quality == 0) { - struct nr_node *nr_nodet; - - spin_lock_bh(&nr_node_list_lock); - nr_node_for_each(nr_nodet, &nr_node_list) { - nr_node_lock(nr_nodet); - for (i = 0; i < nr_nodet->count; i++) - if (nr_nodet->routes[i].neighbour == nr_neigh) - if (i < nr_nodet->which) - nr_nodet->which = i; - nr_node_unlock(nr_nodet); - } - spin_unlock_bh(&nr_node_list_lock); - } - - if (nr_neigh != NULL) - nr_neigh->failed = 0; - - if (quality == 0 && nr_neigh != NULL && nr_node != NULL) { - nr_neigh_put(nr_neigh); - nr_node_put(nr_node); - return 0; - } - - if (nr_neigh == NULL) { - if ((nr_neigh = kmalloc(sizeof(*nr_neigh), GFP_ATOMIC)) == NULL) { - if (nr_node) - nr_node_put(nr_node); - return -ENOMEM; - } - - nr_neigh->callsign = *ax25; - nr_neigh->digipeat = NULL; - nr_neigh->ax25 = NULL; - nr_neigh->dev = dev; - nr_neigh->quality = READ_ONCE(sysctl_netrom_default_path_quality); - nr_neigh->locked = 0; - nr_neigh->count = 0; - nr_neigh->number = nr_neigh_no++; - nr_neigh->failed = 0; - refcount_set(&nr_neigh->refcount, 1); - - if (ax25_digi != NULL && ax25_digi->ndigi > 0) { - nr_neigh->digipeat = kmemdup(ax25_digi, - sizeof(*ax25_digi), - GFP_KERNEL); - if (nr_neigh->digipeat == NULL) { - kfree(nr_neigh); - if (nr_node) - nr_node_put(nr_node); - return -ENOMEM; - } - } - - spin_lock_bh(&nr_neigh_list_lock); - hlist_add_head(&nr_neigh->neigh_node, &nr_neigh_list); - nr_neigh_hold(nr_neigh); - spin_unlock_bh(&nr_neigh_list_lock); - } - - if (quality != 0 && ax25cmp(nr, ax25) == 0 && !nr_neigh->locked) - nr_neigh->quality = quality; - - if (nr_node == NULL) { - if ((nr_node = kmalloc(sizeof(*nr_node), GFP_ATOMIC)) == NULL) { - if (nr_neigh) - nr_neigh_put(nr_neigh); - return -ENOMEM; - } - - nr_node->callsign = *nr; - strscpy(nr_node->mnemonic, mnemonic); - - nr_node->which = 0; - nr_node->count = 1; - refcount_set(&nr_node->refcount, 1); - spin_lock_init(&nr_node->node_lock); - - nr_node->routes[0].quality = quality; - nr_node->routes[0].obs_count = obs_count; - nr_node->routes[0].neighbour = nr_neigh; - - nr_neigh_hold(nr_neigh); - nr_neigh->count++; - - spin_lock_bh(&nr_node_list_lock); - hlist_add_head(&nr_node->node_node, &nr_node_list); - /* refcount initialized at 1 */ - spin_unlock_bh(&nr_node_list_lock); - - nr_neigh_put(nr_neigh); - return 0; - } - nr_node_lock(nr_node); - - if (quality != 0) - strscpy(nr_node->mnemonic, mnemonic); - - for (found = 0, i = 0; i < nr_node->count; i++) { - if (nr_node->routes[i].neighbour == nr_neigh) { - nr_node->routes[i].quality = quality; - nr_node->routes[i].obs_count = obs_count; - found = 1; - break; - } - } - - if (!found) { - /* We have space at the bottom, slot it in */ - if (nr_node->count < 3) { - nr_node->routes[2] = nr_node->routes[1]; - nr_node->routes[1] = nr_node->routes[0]; - - nr_node->routes[0].quality = quality; - nr_node->routes[0].obs_count = obs_count; - nr_node->routes[0].neighbour = nr_neigh; - - nr_node->which++; - nr_node->count++; - nr_neigh_hold(nr_neigh); - nr_neigh->count++; - } else { - /* It must be better than the worst */ - if (quality > nr_node->routes[2].quality) { - nr_node->routes[2].neighbour->count--; - nr_neigh_put(nr_node->routes[2].neighbour); - - if (nr_node->routes[2].neighbour->count == 0 && !nr_node->routes[2].neighbour->locked) - nr_remove_neigh(nr_node->routes[2].neighbour); - - nr_node->routes[2].quality = quality; - nr_node->routes[2].obs_count = obs_count; - nr_node->routes[2].neighbour = nr_neigh; - - nr_neigh_hold(nr_neigh); - nr_neigh->count++; - } - } - } - - /* Now re-sort the routes in quality order */ - switch (nr_node->count) { - case 3: - re_sort_routes(nr_node, 0, 1); - re_sort_routes(nr_node, 1, 2); - fallthrough; - case 2: - re_sort_routes(nr_node, 0, 1); - break; - case 1: - break; - } - - for (i = 0; i < nr_node->count; i++) { - if (nr_node->routes[i].neighbour == nr_neigh) { - if (i < nr_node->which) - nr_node->which = i; - break; - } - } - - nr_neigh_put(nr_neigh); - nr_node_unlock(nr_node); - nr_node_put(nr_node); - return 0; -} - -static void nr_remove_node_locked(struct nr_node *nr_node) -{ - lockdep_assert_held(&nr_node_list_lock); - - hlist_del_init(&nr_node->node_node); - nr_node_put(nr_node); -} - -static inline void __nr_remove_neigh(struct nr_neigh *nr_neigh) -{ - hlist_del_init(&nr_neigh->neigh_node); - nr_neigh_put(nr_neigh); -} - -#define nr_remove_neigh_locked(__neigh) \ - __nr_remove_neigh(__neigh) - -static void nr_remove_neigh(struct nr_neigh *nr_neigh) -{ - spin_lock_bh(&nr_neigh_list_lock); - __nr_remove_neigh(nr_neigh); - spin_unlock_bh(&nr_neigh_list_lock); -} - -/* - * "Delete" a node. Strictly speaking remove a route to a node. The node - * is only deleted if no routes are left to it. - */ -static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct net_device *dev) -{ - struct nr_node *nr_node; - struct nr_neigh *nr_neigh; - int i; - - nr_node = nr_node_get(callsign); - - if (nr_node == NULL) - return -EINVAL; - - nr_neigh = nr_neigh_get_dev(neighbour, dev); - - if (nr_neigh == NULL) { - nr_node_put(nr_node); - return -EINVAL; - } - - spin_lock_bh(&nr_node_list_lock); - nr_node_lock(nr_node); - for (i = 0; i < nr_node->count; i++) { - if (nr_node->routes[i].neighbour == nr_neigh) { - nr_neigh->count--; - nr_neigh_put(nr_neigh); - - if (nr_neigh->count == 0 && !nr_neigh->locked) - nr_remove_neigh(nr_neigh); - nr_neigh_put(nr_neigh); - - nr_node->count--; - - if (nr_node->count == 0) { - nr_remove_node_locked(nr_node); - } else { - switch (i) { - case 0: - nr_node->routes[0] = nr_node->routes[1]; - fallthrough; - case 1: - nr_node->routes[1] = nr_node->routes[2]; - fallthrough; - case 2: - break; - } - nr_node_put(nr_node); - } - nr_node_unlock(nr_node); - spin_unlock_bh(&nr_node_list_lock); - - return 0; - } - } - nr_neigh_put(nr_neigh); - nr_node_unlock(nr_node); - spin_unlock_bh(&nr_node_list_lock); - nr_node_put(nr_node); - - return -EINVAL; -} - -/* - * Lock a neighbour with a quality. - */ -static int __must_check nr_add_neigh(ax25_address *callsign, - ax25_digi *ax25_digi, struct net_device *dev, unsigned int quality) -{ - struct nr_neigh *nr_neigh; - - nr_neigh = nr_neigh_get_dev(callsign, dev); - if (nr_neigh) { - nr_neigh->quality = quality; - nr_neigh->locked = 1; - nr_neigh_put(nr_neigh); - return 0; - } - - if ((nr_neigh = kmalloc(sizeof(*nr_neigh), GFP_ATOMIC)) == NULL) - return -ENOMEM; - - nr_neigh->callsign = *callsign; - nr_neigh->digipeat = NULL; - nr_neigh->ax25 = NULL; - nr_neigh->dev = dev; - nr_neigh->quality = quality; - nr_neigh->locked = 1; - nr_neigh->count = 0; - nr_neigh->number = nr_neigh_no++; - nr_neigh->failed = 0; - refcount_set(&nr_neigh->refcount, 1); - - if (ax25_digi != NULL && ax25_digi->ndigi > 0) { - nr_neigh->digipeat = kmemdup(ax25_digi, sizeof(*ax25_digi), - GFP_KERNEL); - if (nr_neigh->digipeat == NULL) { - kfree(nr_neigh); - return -ENOMEM; - } - } - - spin_lock_bh(&nr_neigh_list_lock); - hlist_add_head(&nr_neigh->neigh_node, &nr_neigh_list); - /* refcount is initialized at 1 */ - spin_unlock_bh(&nr_neigh_list_lock); - - return 0; -} - -/* - * "Delete" a neighbour. The neighbour is only removed if the number - * of nodes that may use it is zero. - */ -static int nr_del_neigh(ax25_address *callsign, struct net_device *dev, unsigned int quality) -{ - struct nr_neigh *nr_neigh; - - nr_neigh = nr_neigh_get_dev(callsign, dev); - - if (nr_neigh == NULL) return -EINVAL; - - nr_neigh->quality = quality; - nr_neigh->locked = 0; - - if (nr_neigh->count == 0) - nr_remove_neigh(nr_neigh); - nr_neigh_put(nr_neigh); - - return 0; -} - -/* - * Decrement the obsolescence count by one. If a route is reduced to a - * count of zero, remove it. Also remove any unlocked neighbours with - * zero nodes routing via it. - */ -static int nr_dec_obs(void) -{ - struct nr_neigh *nr_neigh; - struct nr_node *s; - struct hlist_node *nodet; - int i; - - spin_lock_bh(&nr_node_list_lock); - nr_node_for_each_safe(s, nodet, &nr_node_list) { - nr_node_lock(s); - for (i = 0; i < s->count; i++) { - switch (s->routes[i].obs_count) { - case 0: /* A locked entry */ - break; - - case 1: /* From 1 -> 0 */ - nr_neigh = s->routes[i].neighbour; - - nr_neigh->count--; - nr_neigh_put(nr_neigh); - - if (nr_neigh->count == 0 && !nr_neigh->locked) - nr_remove_neigh(nr_neigh); - - s->count--; - - switch (i) { - case 0: - s->routes[0] = s->routes[1]; - fallthrough; - case 1: - s->routes[1] = s->routes[2]; - break; - case 2: - break; - } - break; - - default: - s->routes[i].obs_count--; - break; - - } - } - - if (s->count <= 0) - nr_remove_node_locked(s); - nr_node_unlock(s); - } - spin_unlock_bh(&nr_node_list_lock); - - return 0; -} - -/* - * A device has been removed. Remove its routes and neighbours. - */ -void nr_rt_device_down(struct net_device *dev) -{ - struct nr_neigh *s; - struct hlist_node *nodet, *node2t; - struct nr_node *t; - int i; - - spin_lock_bh(&nr_neigh_list_lock); - nr_neigh_for_each_safe(s, nodet, &nr_neigh_list) { - if (s->dev == dev) { - spin_lock_bh(&nr_node_list_lock); - nr_node_for_each_safe(t, node2t, &nr_node_list) { - nr_node_lock(t); - for (i = 0; i < t->count; i++) { - if (t->routes[i].neighbour == s) { - t->count--; - - switch (i) { - case 0: - t->routes[0] = t->routes[1]; - fallthrough; - case 1: - t->routes[1] = t->routes[2]; - break; - case 2: - break; - } - } - } - - if (t->count <= 0) - nr_remove_node_locked(t); - nr_node_unlock(t); - } - spin_unlock_bh(&nr_node_list_lock); - - nr_remove_neigh_locked(s); - } - } - spin_unlock_bh(&nr_neigh_list_lock); -} - -/* - * Check that the device given is a valid AX.25 interface that is "up". - * Or a valid ethernet interface with an AX.25 callsign binding. - */ -static struct net_device *nr_ax25_dev_get(char *devname) -{ - struct net_device *dev; - - if ((dev = dev_get_by_name(&init_net, devname)) == NULL) - return NULL; - - if ((dev->flags & IFF_UP) && dev->type == ARPHRD_AX25) - return dev; - - dev_put(dev); - return NULL; -} - -/* - * Find the first active NET/ROM device, usually "nr0". - */ -struct net_device *nr_dev_first(void) -{ - struct net_device *dev, *first = NULL; - - rcu_read_lock(); - for_each_netdev_rcu(&init_net, dev) { - if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM) - if (first == NULL || strncmp(dev->name, first->name, 3) < 0) - first = dev; - } - dev_hold(first); - rcu_read_unlock(); - - return first; -} - -/* - * Find the NET/ROM device for the given callsign. - */ -struct net_device *nr_dev_get(ax25_address *addr) -{ - struct net_device *dev; - - rcu_read_lock(); - for_each_netdev_rcu(&init_net, dev) { - if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM && - ax25cmp(addr, (const ax25_address *)dev->dev_addr) == 0) { - dev_hold(dev); - goto out; - } - } - dev = NULL; -out: - rcu_read_unlock(); - return dev; -} - -static ax25_digi *nr_call_to_digi(ax25_digi *digi, int ndigis, - ax25_address *digipeaters) -{ - int i; - - if (ndigis == 0) - return NULL; - - for (i = 0; i < ndigis; i++) { - digi->calls[i] = digipeaters[i]; - digi->repeated[i] = 0; - } - - digi->ndigi = ndigis; - digi->lastrepeat = -1; - - return digi; -} - -/* - * Handle the ioctls that control the routing functions. - */ -int nr_rt_ioctl(unsigned int cmd, void __user *arg) -{ - struct nr_route_struct nr_route; - struct net_device *dev; - ax25_digi digi; - int ret; - - switch (cmd) { - case SIOCADDRT: - if (copy_from_user(&nr_route, arg, sizeof(struct nr_route_struct))) - return -EFAULT; - if (nr_route.ndigis > AX25_MAX_DIGIS) - return -EINVAL; - if ((dev = nr_ax25_dev_get(nr_route.device)) == NULL) - return -EINVAL; - switch (nr_route.type) { - case NETROM_NODE: - if (strnlen(nr_route.mnemonic, 7) == 7) { - ret = -EINVAL; - break; - } - - ret = nr_add_node(&nr_route.callsign, - nr_route.mnemonic, - &nr_route.neighbour, - nr_call_to_digi(&digi, nr_route.ndigis, - nr_route.digipeaters), - dev, nr_route.quality, - nr_route.obs_count); - break; - case NETROM_NEIGH: - ret = nr_add_neigh(&nr_route.callsign, - nr_call_to_digi(&digi, nr_route.ndigis, - nr_route.digipeaters), - dev, nr_route.quality); - break; - default: - ret = -EINVAL; - } - dev_put(dev); - return ret; - - case SIOCDELRT: - if (copy_from_user(&nr_route, arg, sizeof(struct nr_route_struct))) - return -EFAULT; - if ((dev = nr_ax25_dev_get(nr_route.device)) == NULL) - return -EINVAL; - switch (nr_route.type) { - case NETROM_NODE: - ret = nr_del_node(&nr_route.callsign, - &nr_route.neighbour, dev); - break; - case NETROM_NEIGH: - ret = nr_del_neigh(&nr_route.callsign, - dev, nr_route.quality); - break; - default: - ret = -EINVAL; - } - dev_put(dev); - return ret; - - case SIOCNRDECOBS: - return nr_dec_obs(); - - default: - return -EINVAL; - } - - return 0; -} - -/* - * A level 2 link has timed out, therefore it appears to be a poor link, - * then don't use that neighbour until it is reset. - */ -void nr_link_failed(ax25_cb *ax25, int reason) -{ - struct nr_neigh *s, *nr_neigh = NULL; - struct nr_node *nr_node = NULL; - - spin_lock_bh(&nr_neigh_list_lock); - nr_neigh_for_each(s, &nr_neigh_list) { - if (s->ax25 == ax25) { - nr_neigh_hold(s); - nr_neigh = s; - break; - } - } - spin_unlock_bh(&nr_neigh_list_lock); - - if (nr_neigh == NULL) - return; - - nr_neigh->ax25 = NULL; - ax25_cb_put(ax25); - - if (++nr_neigh->failed < READ_ONCE(sysctl_netrom_link_fails_count)) { - nr_neigh_put(nr_neigh); - return; - } - spin_lock_bh(&nr_node_list_lock); - nr_node_for_each(nr_node, &nr_node_list) { - nr_node_lock(nr_node); - if (nr_node->which < nr_node->count && - nr_node->routes[nr_node->which].neighbour == nr_neigh) - nr_node->which++; - nr_node_unlock(nr_node); - } - spin_unlock_bh(&nr_node_list_lock); - nr_neigh_put(nr_neigh); -} - -/* - * Route a frame to an appropriate AX.25 connection. A NULL ax25_cb - * indicates an internally generated frame. - */ -int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25) -{ - ax25_address *nr_src, *nr_dest; - struct nr_neigh *nr_neigh; - struct nr_node *nr_node; - struct net_device *dev; - unsigned char *dptr; - ax25_cb *ax25s; - int ret; - struct sk_buff *nskb, *oskb; - - /* - * Reject malformed packets early. Check that it contains at least 2 - * addresses and 1 byte more for Time-To-Live - */ - if (skb->len < 2 * sizeof(ax25_address) + 1) - return 0; - - nr_src = (ax25_address *)(skb->data + 0); - nr_dest = (ax25_address *)(skb->data + 7); - - if (ax25 != NULL) { - ret = nr_add_node(nr_src, "", &ax25->dest_addr, ax25->digipeat, - ax25->ax25_dev->dev, 0, - READ_ONCE(sysctl_netrom_obsolescence_count_initialiser)); - if (ret) - return ret; - } - - if ((dev = nr_dev_get(nr_dest)) != NULL) { /* Its for me */ - if (ax25 == NULL) /* Its from me */ - ret = nr_loopback_queue(skb); - else - ret = nr_rx_frame(skb, dev); - dev_put(dev); - return ret; - } - - if (!READ_ONCE(sysctl_netrom_routing_control) && ax25 != NULL) - return 0; - - /* Its Time-To-Live has expired */ - if (skb->data[14] == 1) { - return 0; - } - - nr_node = nr_node_get(nr_dest); - if (nr_node == NULL) - return 0; - nr_node_lock(nr_node); - - if (nr_node->which >= nr_node->count) { - nr_node_unlock(nr_node); - nr_node_put(nr_node); - return 0; - } - - nr_neigh = nr_node->routes[nr_node->which].neighbour; - - if ((dev = nr_dev_first()) == NULL) { - nr_node_unlock(nr_node); - nr_node_put(nr_node); - return 0; - } - - /* We are going to change the netrom headers so we should get our - own skb, we also did not know until now how much header space - we had to reserve... - RXQ */ - nskb = skb_copy_expand(skb, dev->hard_header_len, 0, GFP_ATOMIC); - - if (!nskb) { - nr_node_unlock(nr_node); - nr_node_put(nr_node); - dev_put(dev); - return 0; - } - oskb = skb; - skb = nskb; - skb->data[14]--; - - dptr = skb_push(skb, 1); - *dptr = AX25_P_NETROM; - - ax25s = nr_neigh->ax25; - nr_neigh->ax25 = ax25_send_frame(skb, 256, - (const ax25_address *)dev->dev_addr, - &nr_neigh->callsign, - nr_neigh->digipeat, nr_neigh->dev); - if (ax25s) - ax25_cb_put(ax25s); - - dev_put(dev); - ret = (nr_neigh->ax25 != NULL); - nr_node_unlock(nr_node); - nr_node_put(nr_node); - - if (ret) - kfree_skb(oskb); - - return ret; -} - -#ifdef CONFIG_PROC_FS - -static void *nr_node_start(struct seq_file *seq, loff_t *pos) - __acquires(&nr_node_list_lock) -{ - spin_lock_bh(&nr_node_list_lock); - return seq_hlist_start_head(&nr_node_list, *pos); -} - -static void *nr_node_next(struct seq_file *seq, void *v, loff_t *pos) -{ - return seq_hlist_next(v, &nr_node_list, pos); -} - -static void nr_node_stop(struct seq_file *seq, void *v) - __releases(&nr_node_list_lock) -{ - spin_unlock_bh(&nr_node_list_lock); -} - -static int nr_node_show(struct seq_file *seq, void *v) -{ - char buf[11]; - int i; - - if (v == SEQ_START_TOKEN) - seq_puts(seq, - "callsign mnemonic w n qual obs neigh qual obs neigh qual obs neigh\n"); - else { - struct nr_node *nr_node = hlist_entry(v, struct nr_node, - node_node); - - nr_node_lock(nr_node); - seq_printf(seq, "%-9s %-7s %d %d", - ax2asc(buf, &nr_node->callsign), - (nr_node->mnemonic[0] == '\0') ? "*" : nr_node->mnemonic, - nr_node->which + 1, - nr_node->count); - - for (i = 0; i < nr_node->count; i++) { - seq_printf(seq, " %3d %d %05d", - nr_node->routes[i].quality, - nr_node->routes[i].obs_count, - nr_node->routes[i].neighbour->number); - } - nr_node_unlock(nr_node); - - seq_puts(seq, "\n"); - } - return 0; -} - -const struct seq_operations nr_node_seqops = { - .start = nr_node_start, - .next = nr_node_next, - .stop = nr_node_stop, - .show = nr_node_show, -}; - -static void *nr_neigh_start(struct seq_file *seq, loff_t *pos) - __acquires(&nr_neigh_list_lock) -{ - spin_lock_bh(&nr_neigh_list_lock); - return seq_hlist_start_head(&nr_neigh_list, *pos); -} - -static void *nr_neigh_next(struct seq_file *seq, void *v, loff_t *pos) -{ - return seq_hlist_next(v, &nr_neigh_list, pos); -} - -static void nr_neigh_stop(struct seq_file *seq, void *v) - __releases(&nr_neigh_list_lock) -{ - spin_unlock_bh(&nr_neigh_list_lock); -} - -static int nr_neigh_show(struct seq_file *seq, void *v) -{ - char buf[11]; - int i; - - if (v == SEQ_START_TOKEN) - seq_puts(seq, "addr callsign dev qual lock count failed digipeaters\n"); - else { - struct nr_neigh *nr_neigh; - - nr_neigh = hlist_entry(v, struct nr_neigh, neigh_node); - seq_printf(seq, "%05d %-9s %-4s %3d %d %3d %3d", - nr_neigh->number, - ax2asc(buf, &nr_neigh->callsign), - nr_neigh->dev ? nr_neigh->dev->name : "???", - nr_neigh->quality, - nr_neigh->locked, - nr_neigh->count, - nr_neigh->failed); - - if (nr_neigh->digipeat != NULL) { - for (i = 0; i < nr_neigh->digipeat->ndigi; i++) - seq_printf(seq, " %s", - ax2asc(buf, &nr_neigh->digipeat->calls[i])); - } - - seq_puts(seq, "\n"); - } - return 0; -} - -const struct seq_operations nr_neigh_seqops = { - .start = nr_neigh_start, - .next = nr_neigh_next, - .stop = nr_neigh_stop, - .show = nr_neigh_show, -}; -#endif - -/* - * Free all memory associated with the nodes and routes lists. - */ -void nr_rt_free(void) -{ - struct nr_neigh *s = NULL; - struct nr_node *t = NULL; - struct hlist_node *nodet; - - spin_lock_bh(&nr_neigh_list_lock); - spin_lock_bh(&nr_node_list_lock); - nr_node_for_each_safe(t, nodet, &nr_node_list) { - nr_node_lock(t); - nr_remove_node_locked(t); - nr_node_unlock(t); - } - nr_neigh_for_each_safe(s, nodet, &nr_neigh_list) { - while(s->count) { - s->count--; - nr_neigh_put(s); - } - nr_remove_neigh_locked(s); - } - spin_unlock_bh(&nr_node_list_lock); - spin_unlock_bh(&nr_neigh_list_lock); -} diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c deleted file mode 100644 index c3bbd5880850..000000000000 --- a/net/netrom/nr_subr.c +++ /dev/null @@ -1,280 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - */ -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/slab.h> -#include <net/ax25.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <net/tcp_states.h> -#include <linux/uaccess.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <net/netrom.h> - -/* - * This routine purges all of the queues of frames. - */ -void nr_clear_queues(struct sock *sk) -{ - struct nr_sock *nr = nr_sk(sk); - - skb_queue_purge(&sk->sk_write_queue); - skb_queue_purge(&nr->ack_queue); - skb_queue_purge(&nr->reseq_queue); - skb_queue_purge(&nr->frag_queue); -} - -/* - * This routine purges the input queue of those frames that have been - * acknowledged. This replaces the boxes labelled "V(a) <- N(r)" on the - * SDL diagram. - */ -void nr_frames_acked(struct sock *sk, unsigned short nr) -{ - struct nr_sock *nrom = nr_sk(sk); - struct sk_buff *skb; - - /* - * Remove all the ack-ed frames from the ack queue. - */ - if (nrom->va != nr) { - while (skb_peek(&nrom->ack_queue) != NULL && nrom->va != nr) { - skb = skb_dequeue(&nrom->ack_queue); - kfree_skb(skb); - nrom->va = (nrom->va + 1) % NR_MODULUS; - } - } -} - -/* - * Requeue all the un-ack-ed frames on the output queue to be picked - * up by nr_kick called from the timer. This arrangement handles the - * possibility of an empty output queue. - */ -void nr_requeue_frames(struct sock *sk) -{ - struct sk_buff *skb, *skb_prev = NULL; - - while ((skb = skb_dequeue(&nr_sk(sk)->ack_queue)) != NULL) { - if (skb_prev == NULL) - skb_queue_head(&sk->sk_write_queue, skb); - else - skb_append(skb_prev, skb, &sk->sk_write_queue); - skb_prev = skb; - } -} - -/* - * Validate that the value of nr is between va and vs. Return true or - * false for testing. - */ -int nr_validate_nr(struct sock *sk, unsigned short nr) -{ - struct nr_sock *nrom = nr_sk(sk); - unsigned short vc = nrom->va; - - while (vc != nrom->vs) { - if (nr == vc) return 1; - vc = (vc + 1) % NR_MODULUS; - } - - return nr == nrom->vs; -} - -/* - * Check that ns is within the receive window. - */ -int nr_in_rx_window(struct sock *sk, unsigned short ns) -{ - struct nr_sock *nr = nr_sk(sk); - unsigned short vc = nr->vr; - unsigned short vt = (nr->vl + nr->window) % NR_MODULUS; - - while (vc != vt) { - if (ns == vc) return 1; - vc = (vc + 1) % NR_MODULUS; - } - - return 0; -} - -/* - * This routine is called when the HDLC layer internally generates a - * control frame. - */ -void nr_write_internal(struct sock *sk, int frametype) -{ - struct nr_sock *nr = nr_sk(sk); - struct sk_buff *skb; - unsigned char *dptr; - int len, timeout; - - len = NR_TRANSPORT_LEN; - - switch (frametype & 0x0F) { - case NR_CONNREQ: - len += 17; - break; - case NR_CONNACK: - len += (nr->bpqext) ? 2 : 1; - break; - case NR_DISCREQ: - case NR_DISCACK: - case NR_INFOACK: - break; - default: - printk(KERN_ERR "NET/ROM: nr_write_internal - invalid frame type %d\n", frametype); - return; - } - - skb = alloc_skb(NR_NETWORK_LEN + len, GFP_ATOMIC); - if (!skb) - return; - - /* - * Space for AX.25 and NET/ROM network header - */ - skb_reserve(skb, NR_NETWORK_LEN); - - dptr = skb_put(skb, len); - - switch (frametype & 0x0F) { - case NR_CONNREQ: - timeout = nr->t1 / HZ; - *dptr++ = nr->my_index; - *dptr++ = nr->my_id; - *dptr++ = 0; - *dptr++ = 0; - *dptr++ = frametype; - *dptr++ = nr->window; - memcpy(dptr, &nr->user_addr, AX25_ADDR_LEN); - dptr[6] &= ~AX25_CBIT; - dptr[6] &= ~AX25_EBIT; - dptr[6] |= AX25_SSSID_SPARE; - dptr += AX25_ADDR_LEN; - memcpy(dptr, &nr->source_addr, AX25_ADDR_LEN); - dptr[6] &= ~AX25_CBIT; - dptr[6] &= ~AX25_EBIT; - dptr[6] |= AX25_SSSID_SPARE; - dptr += AX25_ADDR_LEN; - *dptr++ = timeout % 256; - *dptr++ = timeout / 256; - break; - - case NR_CONNACK: - *dptr++ = nr->your_index; - *dptr++ = nr->your_id; - *dptr++ = nr->my_index; - *dptr++ = nr->my_id; - *dptr++ = frametype; - *dptr++ = nr->window; - if (nr->bpqext) - *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); - break; - - case NR_DISCREQ: - case NR_DISCACK: - *dptr++ = nr->your_index; - *dptr++ = nr->your_id; - *dptr++ = 0; - *dptr++ = 0; - *dptr++ = frametype; - break; - - case NR_INFOACK: - *dptr++ = nr->your_index; - *dptr++ = nr->your_id; - *dptr++ = 0; - *dptr++ = nr->vr; - *dptr++ = frametype; - break; - } - - nr_transmit_buffer(sk, skb); -} - -/* - * This routine is called to send an error reply. - */ -void __nr_transmit_reply(struct sk_buff *skb, int mine, unsigned char cmdflags) -{ - struct sk_buff *skbn; - unsigned char *dptr; - int len; - - len = NR_NETWORK_LEN + NR_TRANSPORT_LEN + 1; - - if ((skbn = alloc_skb(len, GFP_ATOMIC)) == NULL) - return; - - skb_reserve(skbn, 0); - - dptr = skb_put(skbn, NR_NETWORK_LEN + NR_TRANSPORT_LEN); - - skb_copy_from_linear_data_offset(skb, 7, dptr, AX25_ADDR_LEN); - dptr[6] &= ~AX25_CBIT; - dptr[6] &= ~AX25_EBIT; - dptr[6] |= AX25_SSSID_SPARE; - dptr += AX25_ADDR_LEN; - - skb_copy_from_linear_data(skb, dptr, AX25_ADDR_LEN); - dptr[6] &= ~AX25_CBIT; - dptr[6] |= AX25_EBIT; - dptr[6] |= AX25_SSSID_SPARE; - dptr += AX25_ADDR_LEN; - - *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); - - if (mine) { - *dptr++ = 0; - *dptr++ = 0; - *dptr++ = skb->data[15]; - *dptr++ = skb->data[16]; - } else { - *dptr++ = skb->data[15]; - *dptr++ = skb->data[16]; - *dptr++ = 0; - *dptr++ = 0; - } - - *dptr++ = cmdflags; - *dptr++ = 0; - - if (!nr_route_frame(skbn, NULL)) - kfree_skb(skbn); -} - -void nr_disconnect(struct sock *sk, int reason) -{ - nr_stop_t1timer(sk); - nr_stop_t2timer(sk); - nr_stop_t4timer(sk); - nr_stop_idletimer(sk); - - nr_clear_queues(sk); - - nr_sk(sk)->state = NR_STATE_0; - - sk->sk_state = TCP_CLOSE; - sk->sk_err = reason; - sk->sk_shutdown |= SEND_SHUTDOWN; - - if (!sock_flag(sk, SOCK_DEAD)) { - sk->sk_state_change(sk); - sock_set_flag(sk, SOCK_DEAD); - } -} diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c deleted file mode 100644 index b3a62b1f3a09..000000000000 --- a/net/netrom/nr_timer.c +++ /dev/null @@ -1,249 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - * Copyright (C) 2002 Ralf Baechle DO1GRB (ralf@gnu.org) - */ -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/jiffies.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <net/ax25.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <net/tcp_states.h> -#include <linux/uaccess.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <net/netrom.h> - -static void nr_heartbeat_expiry(struct timer_list *); -static void nr_t1timer_expiry(struct timer_list *); -static void nr_t2timer_expiry(struct timer_list *); -static void nr_t4timer_expiry(struct timer_list *); -static void nr_idletimer_expiry(struct timer_list *); - -void nr_init_timers(struct sock *sk) -{ - struct nr_sock *nr = nr_sk(sk); - - timer_setup(&nr->t1timer, nr_t1timer_expiry, 0); - timer_setup(&nr->t2timer, nr_t2timer_expiry, 0); - timer_setup(&nr->t4timer, nr_t4timer_expiry, 0); - timer_setup(&nr->idletimer, nr_idletimer_expiry, 0); - - /* initialized by sock_init_data */ - sk->sk_timer.function = nr_heartbeat_expiry; -} - -void nr_start_t1timer(struct sock *sk) -{ - struct nr_sock *nr = nr_sk(sk); - - sk_reset_timer(sk, &nr->t1timer, jiffies + nr->t1); -} - -void nr_start_t2timer(struct sock *sk) -{ - struct nr_sock *nr = nr_sk(sk); - - sk_reset_timer(sk, &nr->t2timer, jiffies + nr->t2); -} - -void nr_start_t4timer(struct sock *sk) -{ - struct nr_sock *nr = nr_sk(sk); - - sk_reset_timer(sk, &nr->t4timer, jiffies + nr->t4); -} - -void nr_start_idletimer(struct sock *sk) -{ - struct nr_sock *nr = nr_sk(sk); - - if (nr->idle > 0) - sk_reset_timer(sk, &nr->idletimer, jiffies + nr->idle); -} - -void nr_start_heartbeat(struct sock *sk) -{ - sk_reset_timer(sk, &sk->sk_timer, jiffies + 5 * HZ); -} - -void nr_stop_t1timer(struct sock *sk) -{ - sk_stop_timer(sk, &nr_sk(sk)->t1timer); -} - -void nr_stop_t2timer(struct sock *sk) -{ - sk_stop_timer(sk, &nr_sk(sk)->t2timer); -} - -void nr_stop_t4timer(struct sock *sk) -{ - sk_stop_timer(sk, &nr_sk(sk)->t4timer); -} - -void nr_stop_idletimer(struct sock *sk) -{ - sk_stop_timer(sk, &nr_sk(sk)->idletimer); -} - -void nr_stop_heartbeat(struct sock *sk) -{ - sk_stop_timer(sk, &sk->sk_timer); -} - -int nr_t1timer_running(struct sock *sk) -{ - return timer_pending(&nr_sk(sk)->t1timer); -} - -static void nr_heartbeat_expiry(struct timer_list *t) -{ - struct sock *sk = timer_container_of(sk, t, sk_timer); - struct nr_sock *nr = nr_sk(sk); - - bh_lock_sock(sk); - switch (nr->state) { - case NR_STATE_0: - /* Magic here: If we listen() and a new link dies before it - is accepted() it isn't 'dead' so doesn't get removed. */ - if (sock_flag(sk, SOCK_DESTROY) || - (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { - if (sk->sk_state == TCP_LISTEN) - sock_hold(sk); - bh_unlock_sock(sk); - nr_destroy_socket(sk); - goto out; - } - break; - - case NR_STATE_3: - /* - * Check for the state of the receive buffer. - */ - if (atomic_read(&sk->sk_rmem_alloc) < (sk->sk_rcvbuf / 2) && - (nr->condition & NR_COND_OWN_RX_BUSY)) { - nr->condition &= ~NR_COND_OWN_RX_BUSY; - nr->condition &= ~NR_COND_ACK_PENDING; - nr->vl = nr->vr; - nr_write_internal(sk, NR_INFOACK); - break; - } - break; - } - - nr_start_heartbeat(sk); - bh_unlock_sock(sk); -out: - sock_put(sk); -} - -static void nr_t2timer_expiry(struct timer_list *t) -{ - struct nr_sock *nr = timer_container_of(nr, t, t2timer); - struct sock *sk = &nr->sock; - - bh_lock_sock(sk); - if (nr->condition & NR_COND_ACK_PENDING) { - nr->condition &= ~NR_COND_ACK_PENDING; - nr_enquiry_response(sk); - } - bh_unlock_sock(sk); - sock_put(sk); -} - -static void nr_t4timer_expiry(struct timer_list *t) -{ - struct nr_sock *nr = timer_container_of(nr, t, t4timer); - struct sock *sk = &nr->sock; - - bh_lock_sock(sk); - nr_sk(sk)->condition &= ~NR_COND_PEER_RX_BUSY; - bh_unlock_sock(sk); - sock_put(sk); -} - -static void nr_idletimer_expiry(struct timer_list *t) -{ - struct nr_sock *nr = timer_container_of(nr, t, idletimer); - struct sock *sk = &nr->sock; - - bh_lock_sock(sk); - - nr_clear_queues(sk); - - nr->n2count = 0; - nr_write_internal(sk, NR_DISCREQ); - nr->state = NR_STATE_2; - - nr_start_t1timer(sk); - nr_stop_t2timer(sk); - nr_stop_t4timer(sk); - - sk->sk_state = TCP_CLOSE; - sk->sk_err = 0; - sk->sk_shutdown |= SEND_SHUTDOWN; - - if (!sock_flag(sk, SOCK_DEAD)) { - sk->sk_state_change(sk); - sock_set_flag(sk, SOCK_DEAD); - } - bh_unlock_sock(sk); - sock_put(sk); -} - -static void nr_t1timer_expiry(struct timer_list *t) -{ - struct nr_sock *nr = timer_container_of(nr, t, t1timer); - struct sock *sk = &nr->sock; - - bh_lock_sock(sk); - switch (nr->state) { - case NR_STATE_1: - if (nr->n2count == nr->n2) { - nr_disconnect(sk, ETIMEDOUT); - goto out; - } else { - nr->n2count++; - nr_write_internal(sk, NR_CONNREQ); - } - break; - - case NR_STATE_2: - if (nr->n2count == nr->n2) { - nr_disconnect(sk, ETIMEDOUT); - goto out; - } else { - nr->n2count++; - nr_write_internal(sk, NR_DISCREQ); - } - break; - - case NR_STATE_3: - if (nr->n2count == nr->n2) { - nr_disconnect(sk, ETIMEDOUT); - goto out; - } else { - nr->n2count++; - nr_requeue_frames(sk); - } - break; - } - - nr_start_t1timer(sk); -out: - bh_unlock_sock(sk); - sock_put(sk); -} diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c deleted file mode 100644 index 7dc0fa628f2e..000000000000 --- a/net/netrom/sysctl_net_netrom.c +++ /dev/null @@ -1,156 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright (C) 1996 Mike Shaver (shaver@zeroknowledge.com) - */ -#include <linux/mm.h> -#include <linux/sysctl.h> -#include <linux/init.h> -#include <net/ax25.h> -#include <net/netrom.h> - -/* - * Values taken from NET/ROM documentation. - */ -static int min_quality[] = {0}, max_quality[] = {255}; -static int min_obs[] = {0}, max_obs[] = {255}; -static int min_ttl[] = {0}, max_ttl[] = {255}; -static int min_t1[] = {5 * HZ}; -static int max_t1[] = {600 * HZ}; -static int min_n2[] = {2}, max_n2[] = {127}; -static int min_t2[] = {1 * HZ}; -static int max_t2[] = {60 * HZ}; -static int min_t4[] = {1 * HZ}; -static int max_t4[] = {1000 * HZ}; -static int min_window[] = {1}, max_window[] = {127}; -static int min_idle[] = {0 * HZ}; -static int max_idle[] = {65535 * HZ}; -static int min_route[] = {0}, max_route[] = {1}; -static int min_fails[] = {1}, max_fails[] = {10}; -static int min_reset[] = {0}, max_reset[] = {1}; - -static struct ctl_table_header *nr_table_header; - -static struct ctl_table nr_table[] = { - { - .procname = "default_path_quality", - .data = &sysctl_netrom_default_path_quality, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_quality, - .extra2 = &max_quality - }, - { - .procname = "obsolescence_count_initialiser", - .data = &sysctl_netrom_obsolescence_count_initialiser, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_obs, - .extra2 = &max_obs - }, - { - .procname = "network_ttl_initialiser", - .data = &sysctl_netrom_network_ttl_initialiser, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_ttl, - .extra2 = &max_ttl - }, - { - .procname = "transport_timeout", - .data = &sysctl_netrom_transport_timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_t1, - .extra2 = &max_t1 - }, - { - .procname = "transport_maximum_tries", - .data = &sysctl_netrom_transport_maximum_tries, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_n2, - .extra2 = &max_n2 - }, - { - .procname = "transport_acknowledge_delay", - .data = &sysctl_netrom_transport_acknowledge_delay, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_t2, - .extra2 = &max_t2 - }, - { - .procname = "transport_busy_delay", - .data = &sysctl_netrom_transport_busy_delay, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_t4, - .extra2 = &max_t4 - }, - { - .procname = "transport_requested_window_size", - .data = &sysctl_netrom_transport_requested_window_size, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_window, - .extra2 = &max_window - }, - { - .procname = "transport_no_activity_timeout", - .data = &sysctl_netrom_transport_no_activity_timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_idle, - .extra2 = &max_idle - }, - { - .procname = "routing_control", - .data = &sysctl_netrom_routing_control, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_route, - .extra2 = &max_route - }, - { - .procname = "link_fails_count", - .data = &sysctl_netrom_link_fails_count, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_fails, - .extra2 = &max_fails - }, - { - .procname = "reset", - .data = &sysctl_netrom_reset_circuit, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_reset, - .extra2 = &max_reset - }, -}; - -int __init nr_register_sysctl(void) -{ - nr_table_header = register_net_sysctl(&init_net, "net/netrom", nr_table); - if (!nr_table_header) - return -ENOMEM; - return 0; -} - -void nr_unregister_sysctl(void) -{ - unregister_net_sysctl_table(nr_table_header); -} diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index e209099218b4..bbbde50fc649 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -2184,9 +2184,40 @@ error: return err; } +static size_t ovs_vport_cmd_msg_size(void) +{ + size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header)); + + msgsize += nla_total_size(sizeof(u32)); /* OVS_VPORT_ATTR_PORT_NO */ + msgsize += nla_total_size(sizeof(u32)); /* OVS_VPORT_ATTR_TYPE */ + msgsize += nla_total_size(IFNAMSIZ); /* OVS_VPORT_ATTR_NAME */ + msgsize += nla_total_size(sizeof(u32)); /* OVS_VPORT_ATTR_IFINDEX */ + msgsize += nla_total_size(sizeof(s32)); /* OVS_VPORT_ATTR_NETNSID */ + + /* OVS_VPORT_ATTR_STATS */ + msgsize += nla_total_size_64bit(sizeof(struct ovs_vport_stats)); + + /* OVS_VPORT_ATTR_UPCALL_STATS(OVS_VPORT_UPCALL_ATTR_SUCCESS + + * OVS_VPORT_UPCALL_ATTR_FAIL) + */ + msgsize += nla_total_size(nla_total_size_64bit(sizeof(u64)) + + nla_total_size_64bit(sizeof(u64))); + + /* OVS_VPORT_ATTR_UPCALL_PID */ + msgsize += nla_total_size(nr_cpu_ids * sizeof(u32)); + + /* OVS_VPORT_ATTR_OPTIONS(OVS_TUNNEL_ATTR_DST_PORT + + * OVS_TUNNEL_ATTR_EXTENSION(OVS_VXLAN_EXT_GBP)) + */ + msgsize += nla_total_size(nla_total_size(sizeof(u16)) + + nla_total_size(nla_total_size(0))); + + return msgsize; +} + static struct sk_buff *ovs_vport_cmd_alloc_info(void) { - return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + return genlmsg_new(ovs_vport_cmd_msg_size(), GFP_KERNEL); } /* Called with ovs_mutex, only via ovs_dp_notify_wq(). */ @@ -2196,7 +2227,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net, struct sk_buff *skb; int retval; - skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + skb = ovs_vport_cmd_alloc_info(); if (!skb) return ERR_PTR(-ENOMEM); diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index b10e1602c6b1..cb5ea4424ffc 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -97,6 +97,9 @@ static struct vport *geneve_tnl_create(const struct vport_parms *parms) goto error; } + vport->dev = dev; + netdev_hold(vport->dev, &vport->dev_tracker, GFP_KERNEL); + rtnl_unlock(); return vport; error: @@ -111,7 +114,7 @@ static struct vport *geneve_create(const struct vport_parms *parms) if (IS_ERR(vport)) return vport; - return ovs_netdev_link(vport, parms->name); + return ovs_netdev_link(vport, true); } static struct vport_ops ovs_geneve_vport_ops = { diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index 4014c9b5eb79..6cb5a697b396 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -63,6 +63,9 @@ static struct vport *gre_tnl_create(const struct vport_parms *parms) return ERR_PTR(err); } + vport->dev = dev; + netdev_hold(vport->dev, &vport->dev_tracker, GFP_KERNEL); + rtnl_unlock(); return vport; } @@ -75,7 +78,7 @@ static struct vport *gre_create(const struct vport_parms *parms) if (IS_ERR(vport)) return vport; - return ovs_netdev_link(vport, parms->name); + return ovs_netdev_link(vport, true); } static struct vport_ops ovs_gre_vport_ops = { diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 12055af832dc..c42642075685 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -73,37 +73,21 @@ static struct net_device *get_dpdev(const struct datapath *dp) return local->dev; } -struct vport *ovs_netdev_link(struct vport *vport, const char *name) +struct vport *ovs_netdev_link(struct vport *vport, bool tunnel) { int err; - vport->dev = dev_get_by_name(ovs_dp_get_net(vport->dp), name); - if (!vport->dev) { + if (WARN_ON_ONCE(!vport->dev)) { err = -ENODEV; goto error_free_vport; } - /* Ensure that the device exists and that the provided - * name is not one of its aliases. - */ - if (strcmp(name, ovs_vport_name(vport))) { - err = -ENODEV; - goto error_put; - } - netdev_tracker_alloc(vport->dev, &vport->dev_tracker, GFP_KERNEL); - if (vport->dev->flags & IFF_LOOPBACK || - (vport->dev->type != ARPHRD_ETHER && - vport->dev->type != ARPHRD_NONE) || - ovs_is_internal_dev(vport->dev)) { - err = -EINVAL; - goto error_put; - } rtnl_lock(); err = netdev_master_upper_dev_link(vport->dev, get_dpdev(vport->dp), NULL, NULL, NULL); if (err) - goto error_unlock; + goto error_put_unlock; err = netdev_rx_handler_register(vport->dev, netdev_frame_hook, vport); @@ -119,10 +103,11 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name) error_master_upper_dev_unlink: netdev_upper_dev_unlink(vport->dev, get_dpdev(vport->dp)); -error_unlock: - rtnl_unlock(); -error_put: +error_put_unlock: + if (tunnel && vport->dev->reg_state == NETREG_REGISTERED) + rtnl_delete_link(vport->dev, 0, NULL); netdev_put(vport->dev, &vport->dev_tracker); + rtnl_unlock(); error_free_vport: ovs_vport_free(vport); return ERR_PTR(err); @@ -132,12 +117,39 @@ EXPORT_SYMBOL_GPL(ovs_netdev_link); static struct vport *netdev_create(const struct vport_parms *parms) { struct vport *vport; + int err; vport = ovs_vport_alloc(0, &ovs_netdev_vport_ops, parms); if (IS_ERR(vport)) return vport; - return ovs_netdev_link(vport, parms->name); + vport->dev = dev_get_by_name(ovs_dp_get_net(vport->dp), parms->name); + if (!vport->dev) { + err = -ENODEV; + goto error_free_vport; + } + netdev_tracker_alloc(vport->dev, &vport->dev_tracker, GFP_KERNEL); + + /* Ensure that the provided name is not an alias. */ + if (strcmp(parms->name, ovs_vport_name(vport))) { + err = -ENODEV; + goto error_put; + } + + if (vport->dev->flags & IFF_LOOPBACK || + (vport->dev->type != ARPHRD_ETHER && + vport->dev->type != ARPHRD_NONE) || + ovs_is_internal_dev(vport->dev)) { + err = -EINVAL; + goto error_put; + } + + return ovs_netdev_link(vport, false); +error_put: + netdev_put(vport->dev, &vport->dev_tracker); +error_free_vport: + ovs_vport_free(vport); + return ERR_PTR(err); } static void vport_netdev_free(struct rcu_head *rcu) @@ -196,9 +208,13 @@ void ovs_netdev_tunnel_destroy(struct vport *vport) */ if (vport->dev->reg_state == NETREG_REGISTERED) rtnl_delete_link(vport->dev, 0, NULL); - rtnl_unlock(); + /* We can't put the device reference yet, since it can still be in + * use, but rtnl_unlock()->netdev_run_todo() will block until all + * the references are released, so the RCU call must be before it. + */ call_rcu(&vport->rcu, vport_netdev_free); + rtnl_unlock(); } EXPORT_SYMBOL_GPL(ovs_netdev_tunnel_destroy); diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h index c5d83a43bfc4..6c0d7366f986 100644 --- a/net/openvswitch/vport-netdev.h +++ b/net/openvswitch/vport-netdev.h @@ -13,7 +13,7 @@ struct vport *ovs_netdev_get_vport(struct net_device *dev); -struct vport *ovs_netdev_link(struct vport *vport, const char *name); +struct vport *ovs_netdev_link(struct vport *vport, bool tunnel); void ovs_netdev_detach_dev(struct vport *); int __init ovs_netdev_init(void); diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 0b881b043bcf..c1b37b50d29e 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -126,6 +126,9 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms) goto error; } + vport->dev = dev; + netdev_hold(vport->dev, &vport->dev_tracker, GFP_KERNEL); + rtnl_unlock(); return vport; error: @@ -140,7 +143,7 @@ static struct vport *vxlan_create(const struct vport_parms *parms) if (IS_ERR(vport)) return vport; - return ovs_netdev_link(vport, parms->name); + return ovs_netdev_link(vport, true); } static struct vport_ops ovs_vxlan_netdev_vport_ops = { diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 23f629e94a36..56b2e2d1a749 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -406,6 +406,9 @@ int ovs_vport_set_upcall_portids(struct vport *vport, const struct nlattr *ids) if (!nla_len(ids) || nla_len(ids) % sizeof(u32)) return -EINVAL; + if (nla_len(ids) / sizeof(u32) > nr_cpu_ids) + return -EINVAL; + old = ovsl_dereference(vport->upcall_portids); vport_portids = kmalloc(sizeof(*vport_portids) + nla_len(ids), diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 4b043241fd56..8e6f3a734ba0 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2718,7 +2718,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) { struct sk_buff *skb = NULL; struct net_device *dev; - struct virtio_net_hdr *vnet_hdr = NULL; + struct virtio_net_hdr vnet_hdr; + bool has_vnet_hdr = false; struct sockcm_cookie sockc; __be16 proto; int err, reserve = 0; @@ -2819,16 +2820,20 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) hlen = LL_RESERVED_SPACE(dev); tlen = dev->needed_tailroom; if (vnet_hdr_sz) { - vnet_hdr = data; data += vnet_hdr_sz; tp_len -= vnet_hdr_sz; - if (tp_len < 0 || - __packet_snd_vnet_parse(vnet_hdr, tp_len)) { + if (tp_len < 0) { + tp_len = -EINVAL; + goto tpacket_error; + } + memcpy(&vnet_hdr, data - vnet_hdr_sz, sizeof(vnet_hdr)); + if (__packet_snd_vnet_parse(&vnet_hdr, tp_len)) { tp_len = -EINVAL; goto tpacket_error; } copylen = __virtio16_to_cpu(vio_le(), - vnet_hdr->hdr_len); + vnet_hdr.hdr_len); + has_vnet_hdr = true; } copylen = max_t(int, copylen, dev->hard_header_len); skb = sock_alloc_send_skb(&po->sk, @@ -2865,12 +2870,12 @@ tpacket_error: } } - if (vnet_hdr_sz) { - if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) { + if (has_vnet_hdr) { + if (virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le())) { tp_len = -EINVAL; goto tpacket_error; } - virtio_net_hdr_set_proto(skb, vnet_hdr); + virtio_net_hdr_set_proto(skb, &vnet_hdr); } skb->destructor = tpacket_destruct_skb; diff --git a/net/phonet/socket.c b/net/phonet/socket.c index c4af26357144..631a99cdbd00 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -208,9 +208,15 @@ static int pn_socket_autobind(struct socket *sock) sa.spn_family = AF_PHONET; err = pn_socket_bind(sock, (struct sockaddr_unsized *)&sa, sizeof(struct sockaddr_pn)); - if (err != -EINVAL) + /* + * pn_socket_bind() also returns -EINVAL when sk_state != TCP_CLOSE + * without a prior bind, so -EINVAL alone is not sufficient to infer + * that the socket was already bound. Only treat it as "already + * bound" when the port is non-zero; otherwise propagate the error + * instead of crashing the kernel. + */ + if (err != -EINVAL || unlikely(!pn_port(pn_sk(sock->sk)->sobject))) return err; - BUG_ON(!pn_port(pn_sk(sock->sk)->sobject)); return 0; /* socket was already bound */ } diff --git a/net/psp/psp-nl-gen.c b/net/psp/psp-nl-gen.c index 22a48d0fa378..953309952cef 100644 --- a/net/psp/psp-nl-gen.c +++ b/net/psp/psp-nl-gen.c @@ -76,7 +76,7 @@ static const struct genl_split_ops psp_nl_ops[] = { .post_doit = psp_device_unlock, .policy = psp_dev_set_nl_policy, .maxattr = PSP_A_DEV_PSP_VERSIONS_ENA, - .flags = GENL_CMD_CAP_DO, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { .cmd = PSP_CMD_KEY_ROTATE, @@ -85,7 +85,7 @@ static const struct genl_split_ops psp_nl_ops[] = { .post_doit = psp_device_unlock, .policy = psp_key_rotate_nl_policy, .maxattr = PSP_A_DEV_ID, - .flags = GENL_CMD_CAP_DO, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { .cmd = PSP_CMD_RX_ASSOC, diff --git a/net/psp/psp_main.c b/net/psp/psp_main.c index 9508b6c38003..e45549f08eef 100644 --- a/net/psp/psp_main.c +++ b/net/psp/psp_main.c @@ -263,15 +263,16 @@ EXPORT_SYMBOL(psp_dev_encapsulate); /* Receive handler for PSP packets. * - * Presently it accepts only already-authenticated packets and does not - * support optional fields, such as virtualization cookies. The caller should - * ensure that skb->data is pointing to the mac header, and that skb->mac_len - * is set. This function does not currently adjust skb->csum (CHECKSUM_COMPLETE - * is not supported). + * Accepts only already-authenticated packets. The full PSP header is + * stripped according to psph->hdrlen; any optional fields it advertises + * (virtualization cookies, etc.) are ignored and discarded along with the + * rest of the header. The caller should ensure that skb->data is pointing + * to the mac header, and that skb->mac_len is set. This function does not + * currently adjust skb->csum (CHECKSUM_COMPLETE is not supported). */ int psp_dev_rcv(struct sk_buff *skb, u16 dev_id, u8 generation, bool strip_icv) { - int l2_hlen = 0, l3_hlen, encap; + int l2_hlen = 0, l3_hlen, encap, psp_hlen; struct psp_skb_ext *pse; struct psphdr *psph; struct ethhdr *eth; @@ -312,18 +313,36 @@ int psp_dev_rcv(struct sk_buff *skb, u16 dev_id, u8 generation, bool strip_icv) if (unlikely(uh->dest != htons(PSP_DEFAULT_UDP_PORT))) return -EINVAL; - pse = skb_ext_add(skb, SKB_EXT_PSP); - if (!pse) + psph = (struct psphdr *)(skb->data + l2_hlen + l3_hlen + + sizeof(struct udphdr)); + + /* Strip the full PSP header per psph->hdrlen; VC/options are pulled + * into the linear region only so they can be discarded with the + * rest of the header. + */ + psp_hlen = (psph->hdrlen + 1) * 8; + + if (unlikely(psp_hlen < sizeof(struct psphdr))) + return -EINVAL; + + if (psp_hlen > sizeof(struct psphdr) && + !pskb_may_pull(skb, l2_hlen + l3_hlen + + sizeof(struct udphdr) + psp_hlen)) return -EINVAL; psph = (struct psphdr *)(skb->data + l2_hlen + l3_hlen + sizeof(struct udphdr)); + + pse = skb_ext_add(skb, SKB_EXT_PSP); + if (!pse) + return -EINVAL; + pse->spi = psph->spi; pse->dev_id = dev_id; pse->generation = generation; pse->version = FIELD_GET(PSPHDR_VERFL_VERSION, psph->verfl); - encap = PSP_ENCAP_HLEN; + encap = sizeof(struct udphdr) + psp_hlen; encap += strip_icv ? PSP_TRL_SIZE : 0; if (proto == htons(ETH_P_IP)) { @@ -340,8 +359,9 @@ int psp_dev_rcv(struct sk_buff *skb, u16 dev_id, u8 generation, bool strip_icv) ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) - encap); } - memmove(skb->data + PSP_ENCAP_HLEN, skb->data, l2_hlen + l3_hlen); - skb_pull(skb, PSP_ENCAP_HLEN); + memmove(skb->data + sizeof(struct udphdr) + psp_hlen, + skb->data, l2_hlen + l3_hlen); + skb_pull(skb, sizeof(struct udphdr) + psp_hlen); if (strip_icv) pskb_trim(skb, skb->len - PSP_TRL_SIZE); diff --git a/net/psp/psp_nl.c b/net/psp/psp_nl.c index 6afd7707ec12..0cc744a6e1c9 100644 --- a/net/psp/psp_nl.c +++ b/net/psp/psp_nl.c @@ -305,8 +305,13 @@ int psp_assoc_device_get_locked(const struct genl_split_ops *ops, psd = psp_dev_get_for_sock(socket->sk); if (psd) { - err = psp_dev_check_access(psd, genl_info_net(info)); - if (err) { + /* Extra care needed here, psp_dev_get_for_sock() only gives + * us access to struct psp_dev's memory, which is quite weak. + */ + mutex_lock(&psd->lock); + if (!psp_dev_is_registered(psd) || + psp_dev_check_access(psd, genl_info_net(info))) { + mutex_unlock(&psd->lock); psp_dev_put(psd); psd = NULL; } @@ -319,7 +324,6 @@ int psp_assoc_device_get_locked(const struct genl_split_ops *ops, id = info->attrs[PSP_A_ASSOC_DEV_ID]; if (psd) { - mutex_lock(&psd->lock); if (id && psd->id != nla_get_u32(id)) { mutex_unlock(&psd->lock); NL_SET_ERR_MSG_ATTR(info->extack, id, diff --git a/net/rds/connection.c b/net/rds/connection.c index 412441aaa298..c10b7ed06c49 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -701,6 +701,13 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len, i++, head++) { hlist_for_each_entry_rcu(conn, head, c_hash_node) { + /* Zero the per-item buffer before handing it to the + * visitor so any field the visitor does not write - + * including implicit alignment padding - cannot leak + * stack contents to user space via rds_info_copy(). + */ + memset(buffer, 0, item_len); + /* XXX no c_lock usage.. */ if (!visitor(conn, buffer)) continue; @@ -750,6 +757,13 @@ static void rds_walk_conn_path_info(struct socket *sock, unsigned int len, */ cp = conn->c_path; + /* Zero the per-item buffer for the same reason as + * rds_for_each_conn_info(): any byte the visitor + * does not write (including alignment padding) must + * not leak stack contents via rds_info_copy(). + */ + memset(buffer, 0, item_len); + /* XXX no cp_lock usage.. */ if (!visitor(cp, buffer)) continue; diff --git a/net/rds/message.c b/net/rds/message.c index eaa6f22601a4..7feb0eb6537d 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -131,24 +131,34 @@ static void rds_rm_zerocopy_callback(struct rds_sock *rs, */ static void rds_message_purge(struct rds_message *rm) { + struct rds_znotifier *znotifier; unsigned long i, flags; - bool zcopy = false; + bool zcopy; if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags))) return; spin_lock_irqsave(&rm->m_rs_lock, flags); + znotifier = rm->data.op_mmp_znotifier; + rm->data.op_mmp_znotifier = NULL; + zcopy = !!znotifier; + if (rm->m_rs) { struct rds_sock *rs = rm->m_rs; - if (rm->data.op_mmp_znotifier) { - zcopy = true; - rds_rm_zerocopy_callback(rs, rm->data.op_mmp_znotifier); + if (znotifier) { + rds_rm_zerocopy_callback(rs, znotifier); rds_wake_sk_sleep(rs); - rm->data.op_mmp_znotifier = NULL; } sock_put(rds_rs_to_sk(rs)); rm->m_rs = NULL; + } else if (znotifier) { + /* + * Zerocopy can fail before the message is queued on the + * socket, so there is no rs to carry the notification. + */ + mm_unaccount_pinned_pages(&znotifier->z_mmp); + kfree(rds_info_from_znotifier(znotifier)); } spin_unlock_irqrestore(&rm->m_rs_lock, flags); @@ -438,6 +448,7 @@ static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter * for (i = 0; i < rm->data.op_nents; i++) put_page(sg_page(&rm->data.op_sg[i])); + rm->data.op_nents = 0; mmp = &rm->data.op_mmp_znotifier->z_mmp; mm_unaccount_pinned_pages(mmp); ret = -EFAULT; diff --git a/net/rds/rdma.c b/net/rds/rdma.c index aa6465dc742c..61fb6e45281b 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -326,10 +326,6 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, if (args->cookie_addr && put_user(cookie, (u64 __user *)(unsigned long)args->cookie_addr)) { - if (!need_odp) { - unpin_user_pages(pages, nr_pages); - kfree(sg); - } ret = -EFAULT; goto out; } diff --git a/net/rose/Makefile b/net/rose/Makefile deleted file mode 100644 index 3e6638f5ba57..000000000000 --- a/net/rose/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# Makefile for the Linux Rose (X.25 PLP) layer. -# - -obj-$(CONFIG_ROSE) += rose.o - -rose-y := af_rose.o rose_dev.o rose_in.o rose_link.o rose_loopback.o \ - rose_out.o rose_route.o rose_subr.o rose_timer.o -rose-$(CONFIG_SYSCTL) += sysctl_net_rose.o diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c deleted file mode 100644 index d5032840ee48..000000000000 --- a/net/rose/af_rose.c +++ /dev/null @@ -1,1687 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk) - * Copyright (C) Terry Dawson VK2KTJ (terry@animats.net) - * Copyright (C) Tomi Manninen OH2BNS (oh2bns@sral.fi) - */ - -#include <linux/capability.h> -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/init.h> -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/slab.h> -#include <linux/kernel.h> -#include <linux/sched/signal.h> -#include <linux/spinlock.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/stat.h> -#include <net/net_namespace.h> -#include <net/ax25.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/if_arp.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <linux/uaccess.h> -#include <linux/fcntl.h> -#include <linux/termios.h> -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <linux/notifier.h> -#include <net/rose.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <net/tcp_states.h> -#include <net/ip.h> -#include <net/arp.h> - -static int rose_ndevs = 10; - -int sysctl_rose_restart_request_timeout = ROSE_DEFAULT_T0; -int sysctl_rose_call_request_timeout = ROSE_DEFAULT_T1; -int sysctl_rose_reset_request_timeout = ROSE_DEFAULT_T2; -int sysctl_rose_clear_request_timeout = ROSE_DEFAULT_T3; -int sysctl_rose_no_activity_timeout = ROSE_DEFAULT_IDLE; -int sysctl_rose_ack_hold_back_timeout = ROSE_DEFAULT_HB; -int sysctl_rose_routing_control = ROSE_DEFAULT_ROUTING; -int sysctl_rose_link_fail_timeout = ROSE_DEFAULT_FAIL_TIMEOUT; -int sysctl_rose_maximum_vcs = ROSE_DEFAULT_MAXVC; -int sysctl_rose_window_size = ROSE_DEFAULT_WINDOW_SIZE; - -static HLIST_HEAD(rose_list); -static DEFINE_SPINLOCK(rose_list_lock); - -static const struct proto_ops rose_proto_ops; - -ax25_address rose_callsign; - -/* - * ROSE network devices are virtual network devices encapsulating ROSE - * frames into AX.25 which will be sent through an AX.25 device, so form a - * special "super class" of normal net devices; split their locks off into a - * separate class since they always nest. - */ -static struct lock_class_key rose_netdev_xmit_lock_key; -static struct lock_class_key rose_netdev_addr_lock_key; - -static void rose_set_lockdep_one(struct net_device *dev, - struct netdev_queue *txq, - void *_unused) -{ - lockdep_set_class(&txq->_xmit_lock, &rose_netdev_xmit_lock_key); -} - -static void rose_set_lockdep_key(struct net_device *dev) -{ - lockdep_set_class(&dev->addr_list_lock, &rose_netdev_addr_lock_key); - netdev_for_each_tx_queue(dev, rose_set_lockdep_one, NULL); -} - -/* - * Convert a ROSE address into text. - */ -char *rose2asc(char *buf, const rose_address *addr) -{ - if (addr->rose_addr[0] == 0x00 && addr->rose_addr[1] == 0x00 && - addr->rose_addr[2] == 0x00 && addr->rose_addr[3] == 0x00 && - addr->rose_addr[4] == 0x00) { - strcpy(buf, "*"); - } else { - sprintf(buf, "%02X%02X%02X%02X%02X", addr->rose_addr[0] & 0xFF, - addr->rose_addr[1] & 0xFF, - addr->rose_addr[2] & 0xFF, - addr->rose_addr[3] & 0xFF, - addr->rose_addr[4] & 0xFF); - } - - return buf; -} - -/* - * Compare two ROSE addresses, 0 == equal. - */ -int rosecmp(const rose_address *addr1, const rose_address *addr2) -{ - int i; - - for (i = 0; i < 5; i++) - if (addr1->rose_addr[i] != addr2->rose_addr[i]) - return 1; - - return 0; -} - -/* - * Compare two ROSE addresses for only mask digits, 0 == equal. - */ -int rosecmpm(const rose_address *addr1, const rose_address *addr2, - unsigned short mask) -{ - unsigned int i, j; - - if (mask > 10) - return 1; - - for (i = 0; i < mask; i++) { - j = i / 2; - - if ((i % 2) != 0) { - if ((addr1->rose_addr[j] & 0x0F) != (addr2->rose_addr[j] & 0x0F)) - return 1; - } else { - if ((addr1->rose_addr[j] & 0xF0) != (addr2->rose_addr[j] & 0xF0)) - return 1; - } - } - - return 0; -} - -/* - * Socket removal during an interrupt is now safe. - */ -static void rose_remove_socket(struct sock *sk) -{ - spin_lock_bh(&rose_list_lock); - sk_del_node_init(sk); - spin_unlock_bh(&rose_list_lock); -} - -/* - * Kill all bound sockets on a broken link layer connection to a - * particular neighbour. - */ -void rose_kill_by_neigh(struct rose_neigh *neigh) -{ - struct sock *s; - - spin_lock_bh(&rose_list_lock); - sk_for_each(s, &rose_list) { - struct rose_sock *rose = rose_sk(s); - - if (rose->neighbour == neigh) { - rose_disconnect(s, ENETUNREACH, ROSE_OUT_OF_ORDER, 0); - rose_neigh_put(rose->neighbour); - rose->neighbour = NULL; - } - } - spin_unlock_bh(&rose_list_lock); -} - -/* - * Kill all bound sockets on a dropped device. - */ -static void rose_kill_by_device(struct net_device *dev) -{ - struct sock *sk, *array[16]; - struct rose_sock *rose; - bool rescan; - int i, cnt; - -start: - rescan = false; - cnt = 0; - spin_lock_bh(&rose_list_lock); - sk_for_each(sk, &rose_list) { - rose = rose_sk(sk); - if (rose->device == dev) { - if (cnt == ARRAY_SIZE(array)) { - rescan = true; - break; - } - sock_hold(sk); - array[cnt++] = sk; - } - } - spin_unlock_bh(&rose_list_lock); - - for (i = 0; i < cnt; i++) { - sk = array[i]; - rose = rose_sk(sk); - lock_sock(sk); - spin_lock_bh(&rose_list_lock); - if (rose->device == dev) { - rose_disconnect(sk, ENETUNREACH, ROSE_OUT_OF_ORDER, 0); - if (rose->neighbour) - rose_neigh_put(rose->neighbour); - netdev_put(rose->device, &rose->dev_tracker); - rose->device = NULL; - } - spin_unlock_bh(&rose_list_lock); - release_sock(sk); - sock_put(sk); - cond_resched(); - } - if (rescan) - goto start; -} - -/* - * Handle device status changes. - */ -static int rose_device_event(struct notifier_block *this, - unsigned long event, void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; - - if (event != NETDEV_DOWN) - return NOTIFY_DONE; - - switch (dev->type) { - case ARPHRD_ROSE: - rose_kill_by_device(dev); - break; - case ARPHRD_AX25: - rose_link_device_down(dev); - rose_rt_device_down(dev); - break; - } - - return NOTIFY_DONE; -} - -/* - * Add a socket to the bound sockets list. - */ -static void rose_insert_socket(struct sock *sk) -{ - - spin_lock_bh(&rose_list_lock); - sk_add_node(sk, &rose_list); - spin_unlock_bh(&rose_list_lock); -} - -/* - * Find a socket that wants to accept the Call Request we just - * received. - */ -static struct sock *rose_find_listener(rose_address *addr, ax25_address *call) -{ - struct sock *s; - - spin_lock_bh(&rose_list_lock); - sk_for_each(s, &rose_list) { - struct rose_sock *rose = rose_sk(s); - - if (!rosecmp(&rose->source_addr, addr) && - !ax25cmp(&rose->source_call, call) && - !rose->source_ndigis && s->sk_state == TCP_LISTEN) - goto found; - } - - sk_for_each(s, &rose_list) { - struct rose_sock *rose = rose_sk(s); - - if (!rosecmp(&rose->source_addr, addr) && - !ax25cmp(&rose->source_call, &null_ax25_address) && - s->sk_state == TCP_LISTEN) - goto found; - } - s = NULL; -found: - spin_unlock_bh(&rose_list_lock); - return s; -} - -/* - * Find a connected ROSE socket given my LCI and device. - */ -struct sock *rose_find_socket(unsigned int lci, struct rose_neigh *neigh) -{ - struct sock *s; - - spin_lock_bh(&rose_list_lock); - sk_for_each(s, &rose_list) { - struct rose_sock *rose = rose_sk(s); - - if (rose->lci == lci && rose->neighbour == neigh) - goto found; - } - s = NULL; -found: - spin_unlock_bh(&rose_list_lock); - return s; -} - -/* - * Find a unique LCI for a given device. - */ -unsigned int rose_new_lci(struct rose_neigh *neigh) -{ - int lci; - - if (neigh->dce_mode) { - for (lci = 1; lci <= sysctl_rose_maximum_vcs; lci++) - if (rose_find_socket(lci, neigh) == NULL && rose_route_free_lci(lci, neigh) == NULL) - return lci; - } else { - for (lci = sysctl_rose_maximum_vcs; lci > 0; lci--) - if (rose_find_socket(lci, neigh) == NULL && rose_route_free_lci(lci, neigh) == NULL) - return lci; - } - - return 0; -} - -/* - * Deferred destroy. - */ -void rose_destroy_socket(struct sock *); - -/* - * Handler for deferred kills. - */ -static void rose_destroy_timer(struct timer_list *t) -{ - struct sock *sk = timer_container_of(sk, t, sk_timer); - - rose_destroy_socket(sk); -} - -/* - * This is called from user mode and the timers. Thus it protects itself - * against interrupt users but doesn't worry about being called during - * work. Once it is removed from the queue no interrupt or bottom half - * will touch it and we are (fairly 8-) ) safe. - */ -void rose_destroy_socket(struct sock *sk) -{ - struct sk_buff *skb; - - rose_remove_socket(sk); - rose_stop_heartbeat(sk); - rose_stop_idletimer(sk); - rose_stop_timer(sk); - - rose_clear_queues(sk); /* Flush the queues */ - - while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { - if (skb->sk != sk) { /* A pending connection */ - /* Queue the unaccepted socket for death */ - sock_set_flag(skb->sk, SOCK_DEAD); - rose_start_heartbeat(skb->sk); - rose_sk(skb->sk)->state = ROSE_STATE_0; - } - - kfree_skb(skb); - } - - if (sk_has_allocations(sk)) { - /* Defer: outstanding buffers */ - timer_setup(&sk->sk_timer, rose_destroy_timer, 0); - sk->sk_timer.expires = jiffies + 10 * HZ; - add_timer(&sk->sk_timer); - } else - sock_put(sk); -} - -/* - * Handling for system calls applied via the various interfaces to a - * ROSE socket object. - */ - -static int rose_setsockopt(struct socket *sock, int level, int optname, - sockptr_t optval, unsigned int optlen) -{ - struct sock *sk = sock->sk; - struct rose_sock *rose = rose_sk(sk); - unsigned int opt; - - if (level != SOL_ROSE) - return -ENOPROTOOPT; - - if (optlen < sizeof(unsigned int)) - return -EINVAL; - - if (copy_from_sockptr(&opt, optval, sizeof(unsigned int))) - return -EFAULT; - - switch (optname) { - case ROSE_DEFER: - rose->defer = opt ? 1 : 0; - return 0; - - case ROSE_T1: - if (opt < 1 || opt > UINT_MAX / HZ) - return -EINVAL; - rose->t1 = opt * HZ; - return 0; - - case ROSE_T2: - if (opt < 1 || opt > UINT_MAX / HZ) - return -EINVAL; - rose->t2 = opt * HZ; - return 0; - - case ROSE_T3: - if (opt < 1 || opt > UINT_MAX / HZ) - return -EINVAL; - rose->t3 = opt * HZ; - return 0; - - case ROSE_HOLDBACK: - if (opt < 1 || opt > UINT_MAX / HZ) - return -EINVAL; - rose->hb = opt * HZ; - return 0; - - case ROSE_IDLE: - if (opt > UINT_MAX / (60 * HZ)) - return -EINVAL; - rose->idle = opt * 60 * HZ; - return 0; - - case ROSE_QBITINCL: - rose->qbitincl = opt ? 1 : 0; - return 0; - - default: - return -ENOPROTOOPT; - } -} - -static int rose_getsockopt(struct socket *sock, int level, int optname, - char __user *optval, int __user *optlen) -{ - struct sock *sk = sock->sk; - struct rose_sock *rose = rose_sk(sk); - int val = 0; - int len; - - if (level != SOL_ROSE) - return -ENOPROTOOPT; - - if (get_user(len, optlen)) - return -EFAULT; - - if (len < 0) - return -EINVAL; - - switch (optname) { - case ROSE_DEFER: - val = rose->defer; - break; - - case ROSE_T1: - val = rose->t1 / HZ; - break; - - case ROSE_T2: - val = rose->t2 / HZ; - break; - - case ROSE_T3: - val = rose->t3 / HZ; - break; - - case ROSE_HOLDBACK: - val = rose->hb / HZ; - break; - - case ROSE_IDLE: - val = rose->idle / (60 * HZ); - break; - - case ROSE_QBITINCL: - val = rose->qbitincl; - break; - - default: - return -ENOPROTOOPT; - } - - len = min_t(unsigned int, len, sizeof(int)); - - if (put_user(len, optlen)) - return -EFAULT; - - return copy_to_user(optval, &val, len) ? -EFAULT : 0; -} - -static int rose_listen(struct socket *sock, int backlog) -{ - struct sock *sk = sock->sk; - - lock_sock(sk); - if (sock->state != SS_UNCONNECTED) { - release_sock(sk); - return -EINVAL; - } - - if (sk->sk_state != TCP_LISTEN) { - struct rose_sock *rose = rose_sk(sk); - - rose->dest_ndigis = 0; - memset(&rose->dest_addr, 0, ROSE_ADDR_LEN); - memset(&rose->dest_call, 0, AX25_ADDR_LEN); - memset(rose->dest_digis, 0, AX25_ADDR_LEN * ROSE_MAX_DIGIS); - sk->sk_max_ack_backlog = backlog; - sk->sk_state = TCP_LISTEN; - release_sock(sk); - return 0; - } - release_sock(sk); - - return -EOPNOTSUPP; -} - -static struct proto rose_proto = { - .name = "ROSE", - .owner = THIS_MODULE, - .obj_size = sizeof(struct rose_sock), -}; - -static int rose_create(struct net *net, struct socket *sock, int protocol, - int kern) -{ - struct sock *sk; - struct rose_sock *rose; - - if (!net_eq(net, &init_net)) - return -EAFNOSUPPORT; - - if (sock->type != SOCK_SEQPACKET || protocol != 0) - return -ESOCKTNOSUPPORT; - - sk = sk_alloc(net, PF_ROSE, GFP_ATOMIC, &rose_proto, kern); - if (sk == NULL) - return -ENOMEM; - - rose = rose_sk(sk); - - sock_init_data(sock, sk); - - skb_queue_head_init(&rose->ack_queue); -#ifdef M_BIT - skb_queue_head_init(&rose->frag_queue); - rose->fraglen = 0; -#endif - - sock->ops = &rose_proto_ops; - sk->sk_protocol = protocol; - - timer_setup(&rose->timer, NULL, 0); - timer_setup(&rose->idletimer, NULL, 0); - - rose->t1 = msecs_to_jiffies(sysctl_rose_call_request_timeout); - rose->t2 = msecs_to_jiffies(sysctl_rose_reset_request_timeout); - rose->t3 = msecs_to_jiffies(sysctl_rose_clear_request_timeout); - rose->hb = msecs_to_jiffies(sysctl_rose_ack_hold_back_timeout); - rose->idle = msecs_to_jiffies(sysctl_rose_no_activity_timeout); - - rose->state = ROSE_STATE_0; - - return 0; -} - -static struct sock *rose_make_new(struct sock *osk) -{ - struct sock *sk; - struct rose_sock *rose, *orose; - - if (osk->sk_type != SOCK_SEQPACKET) - return NULL; - - sk = sk_alloc(sock_net(osk), PF_ROSE, GFP_ATOMIC, &rose_proto, 0); - if (sk == NULL) - return NULL; - - rose = rose_sk(sk); - - sock_init_data(NULL, sk); - - skb_queue_head_init(&rose->ack_queue); -#ifdef M_BIT - skb_queue_head_init(&rose->frag_queue); - rose->fraglen = 0; -#endif - - sk->sk_type = osk->sk_type; - sk->sk_priority = READ_ONCE(osk->sk_priority); - sk->sk_protocol = osk->sk_protocol; - sk->sk_rcvbuf = osk->sk_rcvbuf; - sk->sk_sndbuf = osk->sk_sndbuf; - sk->sk_state = TCP_ESTABLISHED; - sock_copy_flags(sk, osk); - - timer_setup(&rose->timer, NULL, 0); - timer_setup(&rose->idletimer, NULL, 0); - - orose = rose_sk(osk); - rose->t1 = orose->t1; - rose->t2 = orose->t2; - rose->t3 = orose->t3; - rose->hb = orose->hb; - rose->idle = orose->idle; - rose->defer = orose->defer; - rose->device = orose->device; - if (rose->device) - netdev_hold(rose->device, &rose->dev_tracker, GFP_ATOMIC); - rose->qbitincl = orose->qbitincl; - - return sk; -} - -static int rose_release(struct socket *sock) -{ - struct sock *sk = sock->sk; - struct rose_sock *rose; - - if (sk == NULL) return 0; - - sock_hold(sk); - sock_orphan(sk); - lock_sock(sk); - rose = rose_sk(sk); - - switch (rose->state) { - case ROSE_STATE_0: - release_sock(sk); - rose_disconnect(sk, 0, -1, -1); - lock_sock(sk); - rose_destroy_socket(sk); - break; - - case ROSE_STATE_2: - rose_neigh_put(rose->neighbour); - release_sock(sk); - rose_disconnect(sk, 0, -1, -1); - lock_sock(sk); - rose_destroy_socket(sk); - break; - - case ROSE_STATE_1: - case ROSE_STATE_3: - case ROSE_STATE_4: - case ROSE_STATE_5: - rose_clear_queues(sk); - rose_stop_idletimer(sk); - rose_write_internal(sk, ROSE_CLEAR_REQUEST); - rose_start_t3timer(sk); - rose->state = ROSE_STATE_2; - sk->sk_state = TCP_CLOSE; - sk->sk_shutdown |= SEND_SHUTDOWN; - sk->sk_state_change(sk); - sock_set_flag(sk, SOCK_DEAD); - sock_set_flag(sk, SOCK_DESTROY); - break; - - default: - break; - } - - spin_lock_bh(&rose_list_lock); - netdev_put(rose->device, &rose->dev_tracker); - rose->device = NULL; - spin_unlock_bh(&rose_list_lock); - sock->sk = NULL; - release_sock(sk); - sock_put(sk); - - return 0; -} - -static int rose_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len) -{ - struct sock *sk = sock->sk; - struct rose_sock *rose = rose_sk(sk); - struct sockaddr_rose *addr = (struct sockaddr_rose *)uaddr; - struct net_device *dev; - ax25_address *source; - ax25_uid_assoc *user; - int err = -EINVAL; - int n; - - if (addr_len != sizeof(struct sockaddr_rose) && addr_len != sizeof(struct full_sockaddr_rose)) - return -EINVAL; - - if (addr->srose_family != AF_ROSE) - return -EINVAL; - - if (addr_len == sizeof(struct sockaddr_rose) && addr->srose_ndigis > 1) - return -EINVAL; - - if ((unsigned int) addr->srose_ndigis > ROSE_MAX_DIGIS) - return -EINVAL; - - lock_sock(sk); - - if (!sock_flag(sk, SOCK_ZAPPED)) - goto out_release; - - err = -EADDRNOTAVAIL; - dev = rose_dev_get(&addr->srose_addr); - if (!dev) - goto out_release; - - source = &addr->srose_call; - - user = ax25_findbyuid(current_euid()); - if (user) { - rose->source_call = user->call; - ax25_uid_put(user); - } else { - if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) { - dev_put(dev); - err = -EACCES; - goto out_release; - } - rose->source_call = *source; - } - - rose->source_addr = addr->srose_addr; - rose->device = dev; - netdev_tracker_alloc(rose->device, &rose->dev_tracker, GFP_KERNEL); - rose->source_ndigis = addr->srose_ndigis; - - if (addr_len == sizeof(struct full_sockaddr_rose)) { - struct full_sockaddr_rose *full_addr = (struct full_sockaddr_rose *)uaddr; - for (n = 0 ; n < addr->srose_ndigis ; n++) - rose->source_digis[n] = full_addr->srose_digis[n]; - } else { - if (rose->source_ndigis == 1) { - rose->source_digis[0] = addr->srose_digi; - } - } - - rose_insert_socket(sk); - - sock_reset_flag(sk, SOCK_ZAPPED); - err = 0; -out_release: - release_sock(sk); - return err; -} - -static int rose_connect(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len, - int flags) -{ - struct sock *sk = sock->sk; - struct rose_sock *rose = rose_sk(sk); - struct sockaddr_rose *addr = (struct sockaddr_rose *)uaddr; - unsigned char cause, diagnostic; - ax25_uid_assoc *user; - int n, err = 0; - - if (addr_len != sizeof(struct sockaddr_rose) && addr_len != sizeof(struct full_sockaddr_rose)) - return -EINVAL; - - if (addr->srose_family != AF_ROSE) - return -EINVAL; - - if (addr_len == sizeof(struct sockaddr_rose) && addr->srose_ndigis > 1) - return -EINVAL; - - if ((unsigned int) addr->srose_ndigis > ROSE_MAX_DIGIS) - return -EINVAL; - - /* Source + Destination digis should not exceed ROSE_MAX_DIGIS */ - if ((rose->source_ndigis + addr->srose_ndigis) > ROSE_MAX_DIGIS) - return -EINVAL; - - lock_sock(sk); - - if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) { - /* Connect completed during a ERESTARTSYS event */ - sock->state = SS_CONNECTED; - goto out_release; - } - - if (sk->sk_state == TCP_CLOSE && sock->state == SS_CONNECTING) { - sock->state = SS_UNCONNECTED; - err = -ECONNREFUSED; - goto out_release; - } - - if (sk->sk_state == TCP_ESTABLISHED) { - /* No reconnect on a seqpacket socket */ - err = -EISCONN; - goto out_release; - } - - if (sk->sk_state == TCP_SYN_SENT) { - err = -EALREADY; - goto out_release; - } - - sk->sk_state = TCP_CLOSE; - sock->state = SS_UNCONNECTED; - - rose->neighbour = rose_get_neigh(&addr->srose_addr, &cause, - &diagnostic, 0); - if (!rose->neighbour) { - err = -ENETUNREACH; - goto out_release; - } - - rose->lci = rose_new_lci(rose->neighbour); - if (!rose->lci) { - err = -ENETUNREACH; - rose_neigh_put(rose->neighbour); - goto out_release; - } - - if (sock_flag(sk, SOCK_ZAPPED)) { /* Must bind first - autobinding in this may or may not work */ - struct net_device *dev; - - sock_reset_flag(sk, SOCK_ZAPPED); - - dev = rose_dev_first(); - if (!dev) { - err = -ENETUNREACH; - rose_neigh_put(rose->neighbour); - goto out_release; - } - - user = ax25_findbyuid(current_euid()); - if (!user) { - err = -EINVAL; - rose_neigh_put(rose->neighbour); - dev_put(dev); - goto out_release; - } - - memcpy(&rose->source_addr, dev->dev_addr, ROSE_ADDR_LEN); - rose->source_call = user->call; - rose->device = dev; - netdev_tracker_alloc(rose->device, &rose->dev_tracker, - GFP_KERNEL); - ax25_uid_put(user); - - rose_insert_socket(sk); /* Finish the bind */ - } - rose->dest_addr = addr->srose_addr; - rose->dest_call = addr->srose_call; - rose->rand = ((long)rose & 0xFFFF) + rose->lci; - rose->dest_ndigis = addr->srose_ndigis; - - if (addr_len == sizeof(struct full_sockaddr_rose)) { - struct full_sockaddr_rose *full_addr = (struct full_sockaddr_rose *)uaddr; - for (n = 0 ; n < addr->srose_ndigis ; n++) - rose->dest_digis[n] = full_addr->srose_digis[n]; - } else { - if (rose->dest_ndigis == 1) { - rose->dest_digis[0] = addr->srose_digi; - } - } - - /* Move to connecting socket, start sending Connect Requests */ - sock->state = SS_CONNECTING; - sk->sk_state = TCP_SYN_SENT; - - rose->state = ROSE_STATE_1; - - rose_write_internal(sk, ROSE_CALL_REQUEST); - rose_start_heartbeat(sk); - rose_start_t1timer(sk); - - /* Now the loop */ - if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) { - err = -EINPROGRESS; - goto out_release; - } - - /* - * A Connect Ack with Choke or timeout or failed routing will go to - * closed. - */ - if (sk->sk_state == TCP_SYN_SENT) { - DEFINE_WAIT(wait); - - for (;;) { - prepare_to_wait(sk_sleep(sk), &wait, - TASK_INTERRUPTIBLE); - if (sk->sk_state != TCP_SYN_SENT) - break; - if (!signal_pending(current)) { - release_sock(sk); - schedule(); - lock_sock(sk); - continue; - } - err = -ERESTARTSYS; - break; - } - finish_wait(sk_sleep(sk), &wait); - - if (err) - goto out_release; - } - - if (sk->sk_state != TCP_ESTABLISHED) { - sock->state = SS_UNCONNECTED; - err = sock_error(sk); /* Always set at this point */ - goto out_release; - } - - sock->state = SS_CONNECTED; - -out_release: - release_sock(sk); - - return err; -} - -static int rose_accept(struct socket *sock, struct socket *newsock, - struct proto_accept_arg *arg) -{ - struct sk_buff *skb; - struct sock *newsk; - DEFINE_WAIT(wait); - struct sock *sk; - int err = 0; - - if ((sk = sock->sk) == NULL) - return -EINVAL; - - lock_sock(sk); - if (sk->sk_type != SOCK_SEQPACKET) { - err = -EOPNOTSUPP; - goto out_release; - } - - if (sk->sk_state != TCP_LISTEN) { - err = -EINVAL; - goto out_release; - } - - /* - * The write queue this time is holding sockets ready to use - * hooked into the SABM we saved - */ - for (;;) { - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - - skb = skb_dequeue(&sk->sk_receive_queue); - if (skb) - break; - - if (arg->flags & O_NONBLOCK) { - err = -EWOULDBLOCK; - break; - } - if (!signal_pending(current)) { - release_sock(sk); - schedule(); - lock_sock(sk); - continue; - } - err = -ERESTARTSYS; - break; - } - finish_wait(sk_sleep(sk), &wait); - if (err) - goto out_release; - - newsk = skb->sk; - sock_graft(newsk, newsock); - - /* Now attach up the new socket */ - skb->sk = NULL; - kfree_skb(skb); - sk_acceptq_removed(sk); - -out_release: - release_sock(sk); - - return err; -} - -static int rose_getname(struct socket *sock, struct sockaddr *uaddr, - int peer) -{ - struct full_sockaddr_rose *srose = (struct full_sockaddr_rose *)uaddr; - struct sock *sk = sock->sk; - struct rose_sock *rose = rose_sk(sk); - int n; - - memset(srose, 0, sizeof(*srose)); - if (peer != 0) { - if (sk->sk_state != TCP_ESTABLISHED) - return -ENOTCONN; - srose->srose_family = AF_ROSE; - srose->srose_addr = rose->dest_addr; - srose->srose_call = rose->dest_call; - srose->srose_ndigis = rose->dest_ndigis; - for (n = 0; n < rose->dest_ndigis; n++) - srose->srose_digis[n] = rose->dest_digis[n]; - } else { - srose->srose_family = AF_ROSE; - srose->srose_addr = rose->source_addr; - srose->srose_call = rose->source_call; - srose->srose_ndigis = rose->source_ndigis; - for (n = 0; n < rose->source_ndigis; n++) - srose->srose_digis[n] = rose->source_digis[n]; - } - - return sizeof(struct full_sockaddr_rose); -} - -int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct rose_neigh *neigh, unsigned int lci) -{ - struct sock *sk; - struct sock *make; - struct rose_sock *make_rose; - struct rose_facilities_struct facilities; - int n; - - skb->sk = NULL; /* Initially we don't know who it's for */ - - /* - * skb->data points to the rose frame start - */ - memset(&facilities, 0x00, sizeof(struct rose_facilities_struct)); - - if (!rose_parse_facilities(skb->data + ROSE_CALL_REQ_FACILITIES_OFF, - skb->len - ROSE_CALL_REQ_FACILITIES_OFF, - &facilities)) { - rose_transmit_clear_request(neigh, lci, ROSE_INVALID_FACILITY, 76); - return 0; - } - - sk = rose_find_listener(&facilities.source_addr, &facilities.source_call); - - /* - * We can't accept the Call Request. - */ - if (sk == NULL || sk_acceptq_is_full(sk) || - (make = rose_make_new(sk)) == NULL) { - rose_transmit_clear_request(neigh, lci, ROSE_NETWORK_CONGESTION, 120); - return 0; - } - - skb->sk = make; - make->sk_state = TCP_ESTABLISHED; - make_rose = rose_sk(make); - - make_rose->lci = lci; - make_rose->dest_addr = facilities.dest_addr; - make_rose->dest_call = facilities.dest_call; - make_rose->dest_ndigis = facilities.dest_ndigis; - for (n = 0 ; n < facilities.dest_ndigis ; n++) - make_rose->dest_digis[n] = facilities.dest_digis[n]; - make_rose->source_addr = facilities.source_addr; - make_rose->source_call = facilities.source_call; - make_rose->source_ndigis = facilities.source_ndigis; - for (n = 0 ; n < facilities.source_ndigis ; n++) - make_rose->source_digis[n] = facilities.source_digis[n]; - make_rose->neighbour = neigh; - make_rose->device = dev; - /* Caller got a reference for us. */ - netdev_tracker_alloc(make_rose->device, &make_rose->dev_tracker, - GFP_ATOMIC); - make_rose->facilities = facilities; - - rose_neigh_hold(make_rose->neighbour); - - if (rose_sk(sk)->defer) { - make_rose->state = ROSE_STATE_5; - } else { - rose_write_internal(make, ROSE_CALL_ACCEPTED); - make_rose->state = ROSE_STATE_3; - rose_start_idletimer(make); - } - - make_rose->condition = 0x00; - make_rose->vs = 0; - make_rose->va = 0; - make_rose->vr = 0; - make_rose->vl = 0; - sk_acceptq_added(sk); - - rose_insert_socket(make); - - skb_queue_head(&sk->sk_receive_queue, skb); - - rose_start_heartbeat(make); - - if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk); - - return 1; -} - -static int rose_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) -{ - struct sock *sk = sock->sk; - struct rose_sock *rose = rose_sk(sk); - DECLARE_SOCKADDR(struct sockaddr_rose *, usrose, msg->msg_name); - int err; - struct full_sockaddr_rose srose; - struct sk_buff *skb; - unsigned char *asmptr; - int n, size, qbit = 0; - - if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_CMSG_COMPAT)) - return -EINVAL; - - if (sock_flag(sk, SOCK_ZAPPED)) - return -EADDRNOTAVAIL; - - if (sk->sk_shutdown & SEND_SHUTDOWN) { - send_sig(SIGPIPE, current, 0); - return -EPIPE; - } - - if (rose->neighbour == NULL || rose->device == NULL) - return -ENETUNREACH; - - if (usrose != NULL) { - if (msg->msg_namelen != sizeof(struct sockaddr_rose) && msg->msg_namelen != sizeof(struct full_sockaddr_rose)) - return -EINVAL; - memset(&srose, 0, sizeof(struct full_sockaddr_rose)); - memcpy(&srose, usrose, msg->msg_namelen); - if (rosecmp(&rose->dest_addr, &srose.srose_addr) != 0 || - ax25cmp(&rose->dest_call, &srose.srose_call) != 0) - return -EISCONN; - if (srose.srose_ndigis != rose->dest_ndigis) - return -EISCONN; - if (srose.srose_ndigis == rose->dest_ndigis) { - for (n = 0 ; n < srose.srose_ndigis ; n++) - if (ax25cmp(&rose->dest_digis[n], - &srose.srose_digis[n])) - return -EISCONN; - } - if (srose.srose_family != AF_ROSE) - return -EINVAL; - } else { - if (sk->sk_state != TCP_ESTABLISHED) - return -ENOTCONN; - - srose.srose_family = AF_ROSE; - srose.srose_addr = rose->dest_addr; - srose.srose_call = rose->dest_call; - srose.srose_ndigis = rose->dest_ndigis; - for (n = 0 ; n < rose->dest_ndigis ; n++) - srose.srose_digis[n] = rose->dest_digis[n]; - } - - /* Build a packet */ - /* Sanity check the packet size */ - if (len > 65535) - return -EMSGSIZE; - - size = len + AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN; - - if ((skb = sock_alloc_send_skb(sk, size, msg->msg_flags & MSG_DONTWAIT, &err)) == NULL) - return err; - - skb_reserve(skb, AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN); - - /* - * Put the data on the end - */ - - skb_reset_transport_header(skb); - skb_put(skb, len); - - err = memcpy_from_msg(skb_transport_header(skb), msg, len); - if (err) { - kfree_skb(skb); - return err; - } - - /* - * If the Q BIT Include socket option is in force, the first - * byte of the user data is the logical value of the Q Bit. - */ - if (rose->qbitincl) { - qbit = skb->data[0]; - skb_pull(skb, 1); - } - - /* - * Push down the ROSE header - */ - asmptr = skb_push(skb, ROSE_MIN_LEN); - - /* Build a ROSE Network header */ - asmptr[0] = ((rose->lci >> 8) & 0x0F) | ROSE_GFI; - asmptr[1] = (rose->lci >> 0) & 0xFF; - asmptr[2] = ROSE_DATA; - - if (qbit) - asmptr[0] |= ROSE_Q_BIT; - - if (sk->sk_state != TCP_ESTABLISHED) { - kfree_skb(skb); - return -ENOTCONN; - } - -#ifdef M_BIT -#define ROSE_PACLEN (256-ROSE_MIN_LEN) - if (skb->len - ROSE_MIN_LEN > ROSE_PACLEN) { - unsigned char header[ROSE_MIN_LEN]; - struct sk_buff *skbn; - int frontlen; - int lg; - - /* Save a copy of the Header */ - skb_copy_from_linear_data(skb, header, ROSE_MIN_LEN); - skb_pull(skb, ROSE_MIN_LEN); - - frontlen = skb_headroom(skb); - - while (skb->len > 0) { - if ((skbn = sock_alloc_send_skb(sk, frontlen + ROSE_PACLEN, 0, &err)) == NULL) { - kfree_skb(skb); - return err; - } - - skbn->sk = sk; - skbn->free = 1; - skbn->arp = 1; - - skb_reserve(skbn, frontlen); - - lg = (ROSE_PACLEN > skb->len) ? skb->len : ROSE_PACLEN; - - /* Copy the user data */ - skb_copy_from_linear_data(skb, skb_put(skbn, lg), lg); - skb_pull(skb, lg); - - /* Duplicate the Header */ - skb_push(skbn, ROSE_MIN_LEN); - skb_copy_to_linear_data(skbn, header, ROSE_MIN_LEN); - - if (skb->len > 0) - skbn->data[2] |= M_BIT; - - skb_queue_tail(&sk->sk_write_queue, skbn); /* Throw it on the queue */ - } - - skb->free = 1; - kfree_skb(skb); - } else { - skb_queue_tail(&sk->sk_write_queue, skb); /* Throw it on the queue */ - } -#else - skb_queue_tail(&sk->sk_write_queue, skb); /* Shove it onto the queue */ -#endif - - rose_kick(sk); - - return len; -} - - -static int rose_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, - int flags) -{ - struct sock *sk = sock->sk; - struct rose_sock *rose = rose_sk(sk); - size_t copied; - unsigned char *asmptr; - struct sk_buff *skb; - int n, er, qbit; - - /* - * This works for seqpacket too. The receiver has ordered the queue for - * us! We do one quick check first though - */ - if (sk->sk_state != TCP_ESTABLISHED) - return -ENOTCONN; - - /* Now we can treat all alike */ - skb = skb_recv_datagram(sk, flags, &er); - if (!skb) - return er; - - qbit = (skb->data[0] & ROSE_Q_BIT) == ROSE_Q_BIT; - - skb_pull(skb, ROSE_MIN_LEN); - - if (rose->qbitincl) { - asmptr = skb_push(skb, 1); - *asmptr = qbit; - } - - skb_reset_transport_header(skb); - copied = skb->len; - - if (copied > size) { - copied = size; - msg->msg_flags |= MSG_TRUNC; - } - - skb_copy_datagram_msg(skb, 0, msg, copied); - - if (msg->msg_name) { - struct sockaddr_rose *srose; - DECLARE_SOCKADDR(struct full_sockaddr_rose *, full_srose, - msg->msg_name); - - memset(msg->msg_name, 0, sizeof(struct full_sockaddr_rose)); - srose = msg->msg_name; - srose->srose_family = AF_ROSE; - srose->srose_addr = rose->dest_addr; - srose->srose_call = rose->dest_call; - srose->srose_ndigis = rose->dest_ndigis; - for (n = 0 ; n < rose->dest_ndigis ; n++) - full_srose->srose_digis[n] = rose->dest_digis[n]; - msg->msg_namelen = sizeof(struct full_sockaddr_rose); - } - - skb_free_datagram(sk, skb); - - return copied; -} - - -static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) -{ - struct sock *sk = sock->sk; - struct rose_sock *rose = rose_sk(sk); - void __user *argp = (void __user *)arg; - - switch (cmd) { - case TIOCOUTQ: { - long amount; - - amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); - if (amount < 0) - amount = 0; - return put_user(amount, (unsigned int __user *) argp); - } - - case TIOCINQ: { - struct sk_buff *skb; - long amount = 0L; - - spin_lock_irq(&sk->sk_receive_queue.lock); - if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) - amount = skb->len; - spin_unlock_irq(&sk->sk_receive_queue.lock); - return put_user(amount, (unsigned int __user *) argp); - } - - case SIOCGIFADDR: - case SIOCSIFADDR: - case SIOCGIFDSTADDR: - case SIOCSIFDSTADDR: - case SIOCGIFBRDADDR: - case SIOCSIFBRDADDR: - case SIOCGIFNETMASK: - case SIOCSIFNETMASK: - case SIOCGIFMETRIC: - case SIOCSIFMETRIC: - return -EINVAL; - - case SIOCADDRT: - case SIOCDELRT: - case SIOCRSCLRRT: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - return rose_rt_ioctl(cmd, argp); - - case SIOCRSGCAUSE: { - struct rose_cause_struct rose_cause; - rose_cause.cause = rose->cause; - rose_cause.diagnostic = rose->diagnostic; - return copy_to_user(argp, &rose_cause, sizeof(struct rose_cause_struct)) ? -EFAULT : 0; - } - - case SIOCRSSCAUSE: { - struct rose_cause_struct rose_cause; - if (copy_from_user(&rose_cause, argp, sizeof(struct rose_cause_struct))) - return -EFAULT; - rose->cause = rose_cause.cause; - rose->diagnostic = rose_cause.diagnostic; - return 0; - } - - case SIOCRSSL2CALL: - if (!capable(CAP_NET_ADMIN)) return -EPERM; - if (ax25cmp(&rose_callsign, &null_ax25_address) != 0) - ax25_listen_release(&rose_callsign, NULL); - if (copy_from_user(&rose_callsign, argp, sizeof(ax25_address))) - return -EFAULT; - if (ax25cmp(&rose_callsign, &null_ax25_address) != 0) - return ax25_listen_register(&rose_callsign, NULL); - - return 0; - - case SIOCRSGL2CALL: - return copy_to_user(argp, &rose_callsign, sizeof(ax25_address)) ? -EFAULT : 0; - - case SIOCRSACCEPT: - if (rose->state == ROSE_STATE_5) { - rose_write_internal(sk, ROSE_CALL_ACCEPTED); - rose_start_idletimer(sk); - rose->condition = 0x00; - rose->vs = 0; - rose->va = 0; - rose->vr = 0; - rose->vl = 0; - rose->state = ROSE_STATE_3; - } - return 0; - - default: - return -ENOIOCTLCMD; - } - - return 0; -} - -#ifdef CONFIG_PROC_FS -static void *rose_info_start(struct seq_file *seq, loff_t *pos) - __acquires(rose_list_lock) -{ - spin_lock_bh(&rose_list_lock); - return seq_hlist_start_head(&rose_list, *pos); -} - -static void *rose_info_next(struct seq_file *seq, void *v, loff_t *pos) -{ - return seq_hlist_next(v, &rose_list, pos); -} - -static void rose_info_stop(struct seq_file *seq, void *v) - __releases(rose_list_lock) -{ - spin_unlock_bh(&rose_list_lock); -} - -static int rose_info_show(struct seq_file *seq, void *v) -{ - char buf[11], rsbuf[11]; - - if (v == SEQ_START_TOKEN) - seq_puts(seq, - "dest_addr dest_call src_addr src_call dev lci neigh st vs vr va t t1 t2 t3 hb idle Snd-Q Rcv-Q inode\n"); - - else { - struct sock *s = sk_entry(v); - struct rose_sock *rose = rose_sk(s); - const char *devname, *callsign; - const struct net_device *dev = rose->device; - - if (!dev) - devname = "???"; - else - devname = dev->name; - - seq_printf(seq, "%-10s %-9s ", - rose2asc(rsbuf, &rose->dest_addr), - ax2asc(buf, &rose->dest_call)); - - if (ax25cmp(&rose->source_call, &null_ax25_address) == 0) - callsign = "??????-?"; - else - callsign = ax2asc(buf, &rose->source_call); - - seq_printf(seq, - "%-10s %-9s %-5s %3.3X %05d %d %d %d %d %3lu %3lu %3lu %3lu %3lu %3lu/%03lu %5d %5d %llu\n", - rose2asc(rsbuf, &rose->source_addr), - callsign, - devname, - rose->lci & 0x0FFF, - (rose->neighbour) ? rose->neighbour->number : 0, - rose->state, - rose->vs, - rose->vr, - rose->va, - ax25_display_timer(&rose->timer) / HZ, - rose->t1 / HZ, - rose->t2 / HZ, - rose->t3 / HZ, - rose->hb / HZ, - ax25_display_timer(&rose->idletimer) / (60 * HZ), - rose->idle / (60 * HZ), - sk_wmem_alloc_get(s), - sk_rmem_alloc_get(s), - s->sk_socket ? SOCK_INODE(s->sk_socket)->i_ino : (u64)0); - } - - return 0; -} - -static const struct seq_operations rose_info_seqops = { - .start = rose_info_start, - .next = rose_info_next, - .stop = rose_info_stop, - .show = rose_info_show, -}; -#endif /* CONFIG_PROC_FS */ - -static const struct net_proto_family rose_family_ops = { - .family = PF_ROSE, - .create = rose_create, - .owner = THIS_MODULE, -}; - -static const struct proto_ops rose_proto_ops = { - .family = PF_ROSE, - .owner = THIS_MODULE, - .release = rose_release, - .bind = rose_bind, - .connect = rose_connect, - .socketpair = sock_no_socketpair, - .accept = rose_accept, - .getname = rose_getname, - .poll = datagram_poll, - .ioctl = rose_ioctl, - .gettstamp = sock_gettstamp, - .listen = rose_listen, - .shutdown = sock_no_shutdown, - .setsockopt = rose_setsockopt, - .getsockopt = rose_getsockopt, - .sendmsg = rose_sendmsg, - .recvmsg = rose_recvmsg, - .mmap = sock_no_mmap, -}; - -static struct notifier_block rose_dev_notifier = { - .notifier_call = rose_device_event, -}; - -static struct net_device **dev_rose; - -static struct ax25_protocol rose_pid = { - .pid = AX25_P_ROSE, - .func = rose_route_frame -}; - -static struct ax25_linkfail rose_linkfail_notifier = { - .func = rose_link_failed -}; - -static int __init rose_proto_init(void) -{ - int i; - int rc; - - if (rose_ndevs > 0x7FFFFFFF/sizeof(struct net_device *)) { - printk(KERN_ERR "ROSE: rose_proto_init - rose_ndevs parameter too large\n"); - rc = -EINVAL; - goto out; - } - - rc = proto_register(&rose_proto, 0); - if (rc != 0) - goto out; - - rose_callsign = null_ax25_address; - - dev_rose = kzalloc_objs(struct net_device *, rose_ndevs); - if (dev_rose == NULL) { - printk(KERN_ERR "ROSE: rose_proto_init - unable to allocate device structure\n"); - rc = -ENOMEM; - goto out_proto_unregister; - } - - for (i = 0; i < rose_ndevs; i++) { - struct net_device *dev; - char name[IFNAMSIZ]; - - sprintf(name, "rose%d", i); - dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, rose_setup); - if (!dev) { - printk(KERN_ERR "ROSE: rose_proto_init - unable to allocate memory\n"); - rc = -ENOMEM; - goto fail; - } - rc = register_netdev(dev); - if (rc) { - printk(KERN_ERR "ROSE: netdevice registration failed\n"); - free_netdev(dev); - goto fail; - } - rose_set_lockdep_key(dev); - dev_rose[i] = dev; - } - - sock_register(&rose_family_ops); - register_netdevice_notifier(&rose_dev_notifier); - - ax25_register_pid(&rose_pid); - ax25_linkfail_register(&rose_linkfail_notifier); - -#ifdef CONFIG_SYSCTL - rose_register_sysctl(); -#endif - rose_loopback_init(); - - rose_add_loopback_neigh(); - - proc_create_seq("rose", 0444, init_net.proc_net, &rose_info_seqops); - proc_create_seq("rose_neigh", 0444, init_net.proc_net, - &rose_neigh_seqops); - proc_create_seq("rose_nodes", 0444, init_net.proc_net, - &rose_node_seqops); - proc_create_seq("rose_routes", 0444, init_net.proc_net, - &rose_route_seqops); -out: - return rc; -fail: - while (--i >= 0) { - unregister_netdev(dev_rose[i]); - free_netdev(dev_rose[i]); - } - kfree(dev_rose); -out_proto_unregister: - proto_unregister(&rose_proto); - goto out; -} -module_init(rose_proto_init); - -module_param(rose_ndevs, int, 0); -MODULE_PARM_DESC(rose_ndevs, "number of ROSE devices"); - -MODULE_AUTHOR("Jonathan Naylor G4KLX <g4klx@g4klx.demon.co.uk>"); -MODULE_DESCRIPTION("The amateur radio ROSE network layer protocol"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_NETPROTO(PF_ROSE); - -static void __exit rose_exit(void) -{ - int i; - - remove_proc_entry("rose", init_net.proc_net); - remove_proc_entry("rose_neigh", init_net.proc_net); - remove_proc_entry("rose_nodes", init_net.proc_net); - remove_proc_entry("rose_routes", init_net.proc_net); - rose_loopback_clear(); - - rose_rt_free(); - - ax25_protocol_release(AX25_P_ROSE); - ax25_linkfail_release(&rose_linkfail_notifier); - - if (ax25cmp(&rose_callsign, &null_ax25_address) != 0) - ax25_listen_release(&rose_callsign, NULL); - -#ifdef CONFIG_SYSCTL - rose_unregister_sysctl(); -#endif - unregister_netdevice_notifier(&rose_dev_notifier); - - sock_unregister(PF_ROSE); - - for (i = 0; i < rose_ndevs; i++) { - struct net_device *dev = dev_rose[i]; - - if (dev) { - unregister_netdev(dev); - free_netdev(dev); - } - } - - kfree(dev_rose); - proto_unregister(&rose_proto); -} - -module_exit(rose_exit); diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c deleted file mode 100644 index f1a76a5820f1..000000000000 --- a/net/rose/rose_dev.c +++ /dev/null @@ -1,141 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - */ -#include <linux/module.h> -#include <linux/proc_fs.h> -#include <linux/kernel.h> -#include <linux/interrupt.h> -#include <linux/fs.h> -#include <linux/types.h> -#include <linux/sysctl.h> -#include <linux/string.h> -#include <linux/socket.h> -#include <linux/errno.h> -#include <linux/fcntl.h> -#include <linux/in.h> -#include <linux/if_ether.h> -#include <linux/slab.h> - -#include <asm/io.h> - -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/if_arp.h> -#include <linux/skbuff.h> - -#include <net/ip.h> -#include <net/arp.h> - -#include <net/ax25.h> -#include <net/rose.h> - -static int rose_header(struct sk_buff *skb, struct net_device *dev, - unsigned short type, - const void *daddr, const void *saddr, unsigned int len) -{ - unsigned char *buff = skb_push(skb, ROSE_MIN_LEN + 2); - - if (daddr) - memcpy(buff + 7, daddr, dev->addr_len); - - *buff++ = ROSE_GFI | ROSE_Q_BIT; - *buff++ = 0x00; - *buff++ = ROSE_DATA; - *buff++ = 0x7F; - *buff++ = AX25_P_IP; - - if (daddr != NULL) - return 37; - - return -37; -} - -static int rose_set_mac_address(struct net_device *dev, void *addr) -{ - struct sockaddr *sa = addr; - int err; - - if (!memcmp(dev->dev_addr, sa->sa_data, dev->addr_len)) - return 0; - - if (dev->flags & IFF_UP) { - err = rose_add_loopback_node((rose_address *)sa->sa_data); - if (err) - return err; - - rose_del_loopback_node((const rose_address *)dev->dev_addr); - } - - dev_addr_set(dev, sa->sa_data); - - return 0; -} - -static int rose_open(struct net_device *dev) -{ - int err; - - err = rose_add_loopback_node((const rose_address *)dev->dev_addr); - if (err) - return err; - - netif_start_queue(dev); - - return 0; -} - -static int rose_close(struct net_device *dev) -{ - netif_stop_queue(dev); - rose_del_loopback_node((const rose_address *)dev->dev_addr); - return 0; -} - -static netdev_tx_t rose_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct net_device_stats *stats = &dev->stats; - unsigned int len = skb->len; - - if (!netif_running(dev)) { - printk(KERN_ERR "ROSE: rose_xmit - called when iface is down\n"); - return NETDEV_TX_BUSY; - } - - if (!rose_route_frame(skb, NULL)) { - dev_kfree_skb(skb); - stats->tx_errors++; - return NETDEV_TX_OK; - } - - stats->tx_packets++; - stats->tx_bytes += len; - return NETDEV_TX_OK; -} - -static const struct header_ops rose_header_ops = { - .create = rose_header, -}; - -static const struct net_device_ops rose_netdev_ops = { - .ndo_open = rose_open, - .ndo_stop = rose_close, - .ndo_start_xmit = rose_xmit, - .ndo_set_mac_address = rose_set_mac_address, -}; - -void rose_setup(struct net_device *dev) -{ - dev->mtu = ROSE_MAX_PACKET_SIZE - 2; - dev->netdev_ops = &rose_netdev_ops; - - dev->header_ops = &rose_header_ops; - dev->hard_header_len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN; - dev->addr_len = ROSE_ADDR_LEN; - dev->type = ARPHRD_ROSE; - - /* New-style flags. */ - dev->flags = IFF_NOARP; -} diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c deleted file mode 100644 index ca4f217ef3d3..000000000000 --- a/net/rose/rose_in.c +++ /dev/null @@ -1,301 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - * - * Most of this code is based on the SDL diagrams published in the 7th ARRL - * Computer Networking Conference papers. The diagrams have mistakes in them, - * but are mostly correct. Before you modify the code could you read the SDL - * diagrams as the code is not obvious and probably very easy to break. - */ -#include <linux/errno.h> -#include <linux/filter.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <net/ax25.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <net/tcp_states.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <net/rose.h> - -/* - * State machine for state 1, Awaiting Call Accepted State. - * The handling of the timer(s) is in file rose_timer.c. - * Handling of state 0 and connection release is in af_rose.c. - */ -static int rose_state1_machine(struct sock *sk, struct sk_buff *skb, int frametype) -{ - struct rose_sock *rose = rose_sk(sk); - - switch (frametype) { - case ROSE_CALL_ACCEPTED: - rose_stop_timer(sk); - rose_start_idletimer(sk); - rose->condition = 0x00; - rose->vs = 0; - rose->va = 0; - rose->vr = 0; - rose->vl = 0; - rose->state = ROSE_STATE_3; - sk->sk_state = TCP_ESTABLISHED; - if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_state_change(sk); - break; - - case ROSE_CLEAR_REQUEST: - rose_write_internal(sk, ROSE_CLEAR_CONFIRMATION); - rose_disconnect(sk, ECONNREFUSED, skb->data[3], skb->data[4]); - rose_neigh_put(rose->neighbour); - break; - - default: - break; - } - - return 0; -} - -/* - * State machine for state 2, Awaiting Clear Confirmation State. - * The handling of the timer(s) is in file rose_timer.c - * Handling of state 0 and connection release is in af_rose.c. - */ -static int rose_state2_machine(struct sock *sk, struct sk_buff *skb, int frametype) -{ - struct rose_sock *rose = rose_sk(sk); - - switch (frametype) { - case ROSE_CLEAR_REQUEST: - rose_write_internal(sk, ROSE_CLEAR_CONFIRMATION); - rose_disconnect(sk, 0, skb->data[3], skb->data[4]); - rose_neigh_put(rose->neighbour); - break; - - case ROSE_CLEAR_CONFIRMATION: - rose_disconnect(sk, 0, -1, -1); - rose_neigh_put(rose->neighbour); - break; - - default: - break; - } - - return 0; -} - -/* - * State machine for state 3, Connected State. - * The handling of the timer(s) is in file rose_timer.c - * Handling of state 0 and connection release is in af_rose.c. - */ -static int rose_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype, int ns, int nr, int q, int d, int m) -{ - struct rose_sock *rose = rose_sk(sk); - int queued = 0; - - switch (frametype) { - case ROSE_RESET_REQUEST: - rose_stop_timer(sk); - rose_start_idletimer(sk); - rose_write_internal(sk, ROSE_RESET_CONFIRMATION); - rose->condition = 0x00; - rose->vs = 0; - rose->vr = 0; - rose->va = 0; - rose->vl = 0; - rose_requeue_frames(sk); - break; - - case ROSE_CLEAR_REQUEST: - rose_write_internal(sk, ROSE_CLEAR_CONFIRMATION); - rose_disconnect(sk, 0, skb->data[3], skb->data[4]); - rose_neigh_put(rose->neighbour); - break; - - case ROSE_RR: - case ROSE_RNR: - if (!rose_validate_nr(sk, nr)) { - rose_write_internal(sk, ROSE_RESET_REQUEST); - rose->condition = 0x00; - rose->vs = 0; - rose->vr = 0; - rose->va = 0; - rose->vl = 0; - rose->state = ROSE_STATE_4; - rose_start_t2timer(sk); - rose_stop_idletimer(sk); - } else { - rose_frames_acked(sk, nr); - if (frametype == ROSE_RNR) { - rose->condition |= ROSE_COND_PEER_RX_BUSY; - } else { - rose->condition &= ~ROSE_COND_PEER_RX_BUSY; - } - } - break; - - case ROSE_DATA: /* XXX */ - rose->condition &= ~ROSE_COND_PEER_RX_BUSY; - if (!rose_validate_nr(sk, nr)) { - rose_write_internal(sk, ROSE_RESET_REQUEST); - rose->condition = 0x00; - rose->vs = 0; - rose->vr = 0; - rose->va = 0; - rose->vl = 0; - rose->state = ROSE_STATE_4; - rose_start_t2timer(sk); - rose_stop_idletimer(sk); - break; - } - rose_frames_acked(sk, nr); - if (ns == rose->vr) { - rose_start_idletimer(sk); - if (!sk_filter_trim_cap(sk, skb, ROSE_MIN_LEN) && - __sock_queue_rcv_skb(sk, skb) == 0) { - rose->vr = (rose->vr + 1) % ROSE_MODULUS; - queued = 1; - } else { - /* Should never happen ! */ - rose_write_internal(sk, ROSE_RESET_REQUEST); - rose->condition = 0x00; - rose->vs = 0; - rose->vr = 0; - rose->va = 0; - rose->vl = 0; - rose->state = ROSE_STATE_4; - rose_start_t2timer(sk); - rose_stop_idletimer(sk); - break; - } - if (atomic_read(&sk->sk_rmem_alloc) > - (sk->sk_rcvbuf >> 1)) - rose->condition |= ROSE_COND_OWN_RX_BUSY; - } - /* - * If the window is full, ack the frame, else start the - * acknowledge hold back timer. - */ - if (((rose->vl + sysctl_rose_window_size) % ROSE_MODULUS) == rose->vr) { - rose->condition &= ~ROSE_COND_ACK_PENDING; - rose_stop_timer(sk); - rose_enquiry_response(sk); - } else { - rose->condition |= ROSE_COND_ACK_PENDING; - rose_start_hbtimer(sk); - } - break; - - default: - printk(KERN_WARNING "ROSE: unknown %02X in state 3\n", frametype); - break; - } - - return queued; -} - -/* - * State machine for state 4, Awaiting Reset Confirmation State. - * The handling of the timer(s) is in file rose_timer.c - * Handling of state 0 and connection release is in af_rose.c. - */ -static int rose_state4_machine(struct sock *sk, struct sk_buff *skb, int frametype) -{ - struct rose_sock *rose = rose_sk(sk); - - switch (frametype) { - case ROSE_RESET_REQUEST: - rose_write_internal(sk, ROSE_RESET_CONFIRMATION); - fallthrough; - case ROSE_RESET_CONFIRMATION: - rose_stop_timer(sk); - rose_start_idletimer(sk); - rose->condition = 0x00; - rose->va = 0; - rose->vr = 0; - rose->vs = 0; - rose->vl = 0; - rose->state = ROSE_STATE_3; - rose_requeue_frames(sk); - break; - - case ROSE_CLEAR_REQUEST: - rose_write_internal(sk, ROSE_CLEAR_CONFIRMATION); - rose_disconnect(sk, 0, skb->data[3], skb->data[4]); - rose_neigh_put(rose->neighbour); - break; - - default: - break; - } - - return 0; -} - -/* - * State machine for state 5, Awaiting Call Acceptance State. - * The handling of the timer(s) is in file rose_timer.c - * Handling of state 0 and connection release is in af_rose.c. - */ -static int rose_state5_machine(struct sock *sk, struct sk_buff *skb, int frametype) -{ - if (frametype == ROSE_CLEAR_REQUEST) { - rose_write_internal(sk, ROSE_CLEAR_CONFIRMATION); - rose_disconnect(sk, 0, skb->data[3], skb->data[4]); - rose_neigh_put(rose_sk(sk)->neighbour); - } - - return 0; -} - -/* Higher level upcall for a LAPB frame */ -int rose_process_rx_frame(struct sock *sk, struct sk_buff *skb) -{ - struct rose_sock *rose = rose_sk(sk); - int queued = 0, frametype, ns, nr, q, d, m; - - if (rose->state == ROSE_STATE_0) - return 0; - - frametype = rose_decode(skb, &ns, &nr, &q, &d, &m); - - /* - * ROSE_CLEAR_REQUEST carries cause and diagnostic in bytes 3..4. - * Reject a malformed frame that is too short to contain them. - */ - if (frametype == ROSE_CLEAR_REQUEST && skb->len < 5) - return 0; - - switch (rose->state) { - case ROSE_STATE_1: - queued = rose_state1_machine(sk, skb, frametype); - break; - case ROSE_STATE_2: - queued = rose_state2_machine(sk, skb, frametype); - break; - case ROSE_STATE_3: - queued = rose_state3_machine(sk, skb, frametype, ns, nr, q, d, m); - break; - case ROSE_STATE_4: - queued = rose_state4_machine(sk, skb, frametype); - break; - case ROSE_STATE_5: - queued = rose_state5_machine(sk, skb, frametype); - break; - } - - rose_kick(sk); - - return queued; -} diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c deleted file mode 100644 index 7746229fdc8c..000000000000 --- a/net/rose/rose_link.c +++ /dev/null @@ -1,289 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - */ -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/jiffies.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/slab.h> -#include <net/ax25.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <net/rose.h> - -static void rose_ftimer_expiry(struct timer_list *); -static void rose_t0timer_expiry(struct timer_list *); - -static void rose_transmit_restart_confirmation(struct rose_neigh *neigh); -static void rose_transmit_restart_request(struct rose_neigh *neigh); - -void rose_start_ftimer(struct rose_neigh *neigh) -{ - timer_delete(&neigh->ftimer); - - neigh->ftimer.function = rose_ftimer_expiry; - neigh->ftimer.expires = - jiffies + msecs_to_jiffies(sysctl_rose_link_fail_timeout); - - add_timer(&neigh->ftimer); -} - -static void rose_start_t0timer(struct rose_neigh *neigh) -{ - timer_delete(&neigh->t0timer); - - neigh->t0timer.function = rose_t0timer_expiry; - neigh->t0timer.expires = - jiffies + msecs_to_jiffies(sysctl_rose_restart_request_timeout); - - add_timer(&neigh->t0timer); -} - -void rose_stop_ftimer(struct rose_neigh *neigh) -{ - timer_delete(&neigh->ftimer); -} - -void rose_stop_t0timer(struct rose_neigh *neigh) -{ - timer_delete(&neigh->t0timer); -} - -int rose_ftimer_running(struct rose_neigh *neigh) -{ - return timer_pending(&neigh->ftimer); -} - -static int rose_t0timer_running(struct rose_neigh *neigh) -{ - return timer_pending(&neigh->t0timer); -} - -static void rose_ftimer_expiry(struct timer_list *t) -{ -} - -static void rose_t0timer_expiry(struct timer_list *t) -{ - struct rose_neigh *neigh = timer_container_of(neigh, t, t0timer); - - rose_transmit_restart_request(neigh); - - neigh->dce_mode = 0; - - rose_start_t0timer(neigh); -} - -/* - * Interface to ax25_send_frame. Changes my level 2 callsign depending - * on whether we have a global ROSE callsign or use the default port - * callsign. - */ -static int rose_send_frame(struct sk_buff *skb, struct rose_neigh *neigh) -{ - const ax25_address *rose_call; - ax25_cb *ax25s; - - if (ax25cmp(&rose_callsign, &null_ax25_address) == 0) - rose_call = (const ax25_address *)neigh->dev->dev_addr; - else - rose_call = &rose_callsign; - - ax25s = neigh->ax25; - neigh->ax25 = ax25_send_frame(skb, 260, rose_call, &neigh->callsign, neigh->digipeat, neigh->dev); - if (ax25s) - ax25_cb_put(ax25s); - - return neigh->ax25 != NULL; -} - -/* - * Interface to ax25_link_up. Changes my level 2 callsign depending - * on whether we have a global ROSE callsign or use the default port - * callsign. - */ -static int rose_link_up(struct rose_neigh *neigh) -{ - const ax25_address *rose_call; - ax25_cb *ax25s; - - if (ax25cmp(&rose_callsign, &null_ax25_address) == 0) - rose_call = (const ax25_address *)neigh->dev->dev_addr; - else - rose_call = &rose_callsign; - - ax25s = neigh->ax25; - neigh->ax25 = ax25_find_cb(rose_call, &neigh->callsign, neigh->digipeat, neigh->dev); - if (ax25s) - ax25_cb_put(ax25s); - - return neigh->ax25 != NULL; -} - -/* - * This handles all restart and diagnostic frames. - */ -void rose_link_rx_restart(struct sk_buff *skb, struct rose_neigh *neigh, unsigned short frametype) -{ - struct sk_buff *skbn; - - switch (frametype) { - case ROSE_RESTART_REQUEST: - rose_stop_t0timer(neigh); - neigh->restarted = 1; - neigh->dce_mode = (skb->data[3] == ROSE_DTE_ORIGINATED); - rose_transmit_restart_confirmation(neigh); - break; - - case ROSE_RESTART_CONFIRMATION: - rose_stop_t0timer(neigh); - neigh->restarted = 1; - break; - - case ROSE_DIAGNOSTIC: - pr_warn("ROSE: received diagnostic #%d - %3ph\n", skb->data[3], - skb->data + 4); - break; - - default: - printk(KERN_WARNING "ROSE: received unknown %02X with LCI 000\n", frametype); - break; - } - - if (neigh->restarted) { - while ((skbn = skb_dequeue(&neigh->queue)) != NULL) - if (!rose_send_frame(skbn, neigh)) - kfree_skb(skbn); - } -} - -/* - * This routine is called when a Restart Request is needed - */ -static void rose_transmit_restart_request(struct rose_neigh *neigh) -{ - struct sk_buff *skb; - unsigned char *dptr; - int len; - - len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN + 3; - - if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL) - return; - - skb_reserve(skb, AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN); - - dptr = skb_put(skb, ROSE_MIN_LEN + 3); - - *dptr++ = AX25_P_ROSE; - *dptr++ = ROSE_GFI; - *dptr++ = 0x00; - *dptr++ = ROSE_RESTART_REQUEST; - *dptr++ = ROSE_DTE_ORIGINATED; - *dptr++ = 0; - - if (!rose_send_frame(skb, neigh)) - kfree_skb(skb); -} - -/* - * This routine is called when a Restart Confirmation is needed - */ -static void rose_transmit_restart_confirmation(struct rose_neigh *neigh) -{ - struct sk_buff *skb; - unsigned char *dptr; - int len; - - len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN + 1; - - if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL) - return; - - skb_reserve(skb, AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN); - - dptr = skb_put(skb, ROSE_MIN_LEN + 1); - - *dptr++ = AX25_P_ROSE; - *dptr++ = ROSE_GFI; - *dptr++ = 0x00; - *dptr++ = ROSE_RESTART_CONFIRMATION; - - if (!rose_send_frame(skb, neigh)) - kfree_skb(skb); -} - -/* - * This routine is called when a Clear Request is needed outside of the context - * of a connected socket. - */ -void rose_transmit_clear_request(struct rose_neigh *neigh, unsigned int lci, unsigned char cause, unsigned char diagnostic) -{ - struct sk_buff *skb; - unsigned char *dptr; - int len; - - if (!neigh->dev) - return; - - len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN + 3; - - if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL) - return; - - skb_reserve(skb, AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN); - - dptr = skb_put(skb, ROSE_MIN_LEN + 3); - - *dptr++ = AX25_P_ROSE; - *dptr++ = ((lci >> 8) & 0x0F) | ROSE_GFI; - *dptr++ = ((lci >> 0) & 0xFF); - *dptr++ = ROSE_CLEAR_REQUEST; - *dptr++ = cause; - *dptr++ = diagnostic; - - if (!rose_send_frame(skb, neigh)) - kfree_skb(skb); -} - -void rose_transmit_link(struct sk_buff *skb, struct rose_neigh *neigh) -{ - unsigned char *dptr; - - if (neigh->loopback) { - rose_loopback_queue(skb, neigh); - return; - } - - if (!rose_link_up(neigh)) - neigh->restarted = 0; - - dptr = skb_push(skb, 1); - *dptr++ = AX25_P_ROSE; - - if (neigh->restarted) { - if (!rose_send_frame(skb, neigh)) - kfree_skb(skb); - } else { - skb_queue_tail(&neigh->queue, skb); - - if (!rose_t0timer_running(neigh)) { - rose_transmit_restart_request(neigh); - neigh->dce_mode = 0; - rose_start_t0timer(neigh); - } - } -} diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c deleted file mode 100644 index b538e39b3df5..000000000000 --- a/net/rose/rose_loopback.c +++ /dev/null @@ -1,133 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - */ -#include <linux/types.h> -#include <linux/slab.h> -#include <linux/socket.h> -#include <linux/timer.h> -#include <net/ax25.h> -#include <linux/skbuff.h> -#include <net/rose.h> -#include <linux/init.h> - -static struct sk_buff_head loopback_queue; -#define ROSE_LOOPBACK_LIMIT 1000 -static struct timer_list loopback_timer; - -static void rose_set_loopback_timer(void); -static void rose_loopback_timer(struct timer_list *unused); - -void rose_loopback_init(void) -{ - skb_queue_head_init(&loopback_queue); - - timer_setup(&loopback_timer, rose_loopback_timer, 0); -} - -static int rose_loopback_running(void) -{ - return timer_pending(&loopback_timer); -} - -int rose_loopback_queue(struct sk_buff *skb, struct rose_neigh *neigh) -{ - struct sk_buff *skbn = NULL; - - if (skb_queue_len(&loopback_queue) < ROSE_LOOPBACK_LIMIT) - skbn = skb_clone(skb, GFP_ATOMIC); - - if (skbn) { - consume_skb(skb); - skb_queue_tail(&loopback_queue, skbn); - - if (!rose_loopback_running()) - rose_set_loopback_timer(); - } else { - kfree_skb(skb); - } - - return 1; -} - -static void rose_set_loopback_timer(void) -{ - mod_timer(&loopback_timer, jiffies + 10); -} - -static void rose_loopback_timer(struct timer_list *unused) -{ - struct sk_buff *skb; - struct net_device *dev; - rose_address *dest; - struct sock *sk; - unsigned short frametype; - unsigned int lci_i, lci_o; - int count; - - for (count = 0; count < ROSE_LOOPBACK_LIMIT; count++) { - skb = skb_dequeue(&loopback_queue); - if (!skb) - return; - if (skb->len < ROSE_MIN_LEN) { - kfree_skb(skb); - continue; - } - lci_i = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF); - frametype = skb->data[2]; - if (frametype == ROSE_CALL_REQUEST && - (skb->len <= ROSE_CALL_REQ_FACILITIES_OFF || - skb->data[ROSE_CALL_REQ_ADDR_LEN_OFF] != - ROSE_CALL_REQ_ADDR_LEN_VAL)) { - kfree_skb(skb); - continue; - } - dest = (rose_address *)(skb->data + ROSE_CALL_REQ_DEST_ADDR_OFF); - lci_o = ROSE_DEFAULT_MAXVC + 1 - lci_i; - - skb_reset_transport_header(skb); - - sk = rose_find_socket(lci_o, rose_loopback_neigh); - if (sk) { - if (rose_process_rx_frame(sk, skb) == 0) - kfree_skb(skb); - continue; - } - - if (frametype == ROSE_CALL_REQUEST) { - if (!rose_loopback_neigh->dev && - !rose_loopback_neigh->loopback) { - kfree_skb(skb); - continue; - } - - dev = rose_dev_get(dest); - if (!dev) { - kfree_skb(skb); - continue; - } - - if (rose_rx_call_request(skb, dev, rose_loopback_neigh, lci_o) == 0) { - dev_put(dev); - kfree_skb(skb); - } - } else { - kfree_skb(skb); - } - } - if (!skb_queue_empty(&loopback_queue)) - mod_timer(&loopback_timer, jiffies + 1); -} - -void __exit rose_loopback_clear(void) -{ - struct sk_buff *skb; - - timer_delete(&loopback_timer); - - while ((skb = skb_dequeue(&loopback_queue)) != NULL) { - skb->sk = NULL; - kfree_skb(skb); - } -} diff --git a/net/rose/rose_out.c b/net/rose/rose_out.c deleted file mode 100644 index 9050e33c9604..000000000000 --- a/net/rose/rose_out.c +++ /dev/null @@ -1,122 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - */ -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/gfp.h> -#include <net/ax25.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <net/rose.h> - -/* - * This procedure is passed a buffer descriptor for an iframe. It builds - * the rest of the control part of the frame and then writes it out. - */ -static void rose_send_iframe(struct sock *sk, struct sk_buff *skb) -{ - struct rose_sock *rose = rose_sk(sk); - - if (skb == NULL) - return; - - skb->data[2] |= (rose->vr << 5) & 0xE0; - skb->data[2] |= (rose->vs << 1) & 0x0E; - - rose_start_idletimer(sk); - - rose_transmit_link(skb, rose->neighbour); -} - -void rose_kick(struct sock *sk) -{ - struct rose_sock *rose = rose_sk(sk); - struct sk_buff *skb, *skbn; - unsigned short start, end; - - if (rose->state != ROSE_STATE_3) - return; - - if (rose->condition & ROSE_COND_PEER_RX_BUSY) - return; - - if (!skb_peek(&sk->sk_write_queue)) - return; - - start = (skb_peek(&rose->ack_queue) == NULL) ? rose->va : rose->vs; - end = (rose->va + sysctl_rose_window_size) % ROSE_MODULUS; - - if (start == end) - return; - - rose->vs = start; - - /* - * Transmit data until either we're out of data to send or - * the window is full. - */ - - skb = skb_dequeue(&sk->sk_write_queue); - - do { - if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) { - skb_queue_head(&sk->sk_write_queue, skb); - break; - } - - skb_set_owner_w(skbn, sk); - - /* - * Transmit the frame copy. - */ - rose_send_iframe(sk, skbn); - - rose->vs = (rose->vs + 1) % ROSE_MODULUS; - - /* - * Requeue the original data frame. - */ - skb_queue_tail(&rose->ack_queue, skb); - - } while (rose->vs != end && - (skb = skb_dequeue(&sk->sk_write_queue)) != NULL); - - rose->vl = rose->vr; - rose->condition &= ~ROSE_COND_ACK_PENDING; - - rose_stop_timer(sk); -} - -/* - * The following routines are taken from page 170 of the 7th ARRL Computer - * Networking Conference paper, as is the whole state machine. - */ - -void rose_enquiry_response(struct sock *sk) -{ - struct rose_sock *rose = rose_sk(sk); - - if (rose->condition & ROSE_COND_OWN_RX_BUSY) - rose_write_internal(sk, ROSE_RNR); - else - rose_write_internal(sk, ROSE_RR); - - rose->vl = rose->vr; - rose->condition &= ~ROSE_COND_ACK_PENDING; - - rose_stop_timer(sk); -} diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c deleted file mode 100644 index e31842e6b3c8..000000000000 --- a/net/rose/rose_route.c +++ /dev/null @@ -1,1333 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - * Copyright (C) Terry Dawson VK2KTJ (terry@animats.net) - */ -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/slab.h> -#include <net/ax25.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <net/arp.h> -#include <linux/if_arp.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <net/tcp_states.h> -#include <linux/uaccess.h> -#include <linux/fcntl.h> -#include <linux/termios.h> /* For TIOCINQ/OUTQ */ -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <linux/notifier.h> -#include <linux/init.h> -#include <net/rose.h> -#include <linux/seq_file.h> -#include <linux/export.h> - -static unsigned int rose_neigh_no = 1; - -static struct rose_node *rose_node_list; -static DEFINE_SPINLOCK(rose_node_list_lock); -static struct rose_neigh *rose_neigh_list; -static DEFINE_SPINLOCK(rose_neigh_list_lock); -static struct rose_route *rose_route_list; -static DEFINE_SPINLOCK(rose_route_list_lock); - -struct rose_neigh *rose_loopback_neigh; - -/* - * Add a new route to a node, and in the process add the node and the - * neighbour if it is new. - */ -static int __must_check rose_add_node(struct rose_route_struct *rose_route, - struct net_device *dev) -{ - struct rose_node *rose_node, *rose_tmpn, *rose_tmpp; - struct rose_neigh *rose_neigh; - int i, res = 0; - - spin_lock_bh(&rose_node_list_lock); - spin_lock_bh(&rose_neigh_list_lock); - - rose_node = rose_node_list; - while (rose_node != NULL) { - if ((rose_node->mask == rose_route->mask) && - (rosecmpm(&rose_route->address, &rose_node->address, - rose_route->mask) == 0)) - break; - rose_node = rose_node->next; - } - - if (rose_node != NULL && rose_node->loopback) { - res = -EINVAL; - goto out; - } - - rose_neigh = rose_neigh_list; - while (rose_neigh != NULL) { - if (ax25cmp(&rose_route->neighbour, - &rose_neigh->callsign) == 0 && - rose_neigh->dev == dev) - break; - rose_neigh = rose_neigh->next; - } - - if (rose_neigh == NULL) { - rose_neigh = kmalloc_obj(*rose_neigh, GFP_ATOMIC); - if (rose_neigh == NULL) { - res = -ENOMEM; - goto out; - } - - rose_neigh->callsign = rose_route->neighbour; - rose_neigh->digipeat = NULL; - rose_neigh->ax25 = NULL; - rose_neigh->dev = dev; - rose_neigh->count = 0; - rose_neigh->dce_mode = 0; - rose_neigh->loopback = 0; - rose_neigh->number = rose_neigh_no++; - rose_neigh->restarted = 0; - refcount_set(&rose_neigh->use, 1); - - skb_queue_head_init(&rose_neigh->queue); - - timer_setup(&rose_neigh->ftimer, NULL, 0); - timer_setup(&rose_neigh->t0timer, NULL, 0); - - if (rose_route->ndigis != 0) { - rose_neigh->digipeat = - kmalloc_obj(ax25_digi, GFP_ATOMIC); - if (rose_neigh->digipeat == NULL) { - kfree(rose_neigh); - res = -ENOMEM; - goto out; - } - - rose_neigh->digipeat->ndigi = rose_route->ndigis; - rose_neigh->digipeat->lastrepeat = -1; - - for (i = 0; i < rose_route->ndigis; i++) { - rose_neigh->digipeat->calls[i] = - rose_route->digipeaters[i]; - rose_neigh->digipeat->repeated[i] = 0; - } - } - - rose_neigh->next = rose_neigh_list; - rose_neigh_list = rose_neigh; - } - - /* - * This is a new node to be inserted into the list. Find where it needs - * to be inserted into the list, and insert it. We want to be sure - * to order the list in descending order of mask size to ensure that - * later when we are searching this list the first match will be the - * best match. - */ - if (rose_node == NULL) { - rose_tmpn = rose_node_list; - rose_tmpp = NULL; - - while (rose_tmpn != NULL) { - if (rose_tmpn->mask > rose_route->mask) { - rose_tmpp = rose_tmpn; - rose_tmpn = rose_tmpn->next; - } else { - break; - } - } - - /* create new node */ - rose_node = kmalloc_obj(*rose_node, GFP_ATOMIC); - if (rose_node == NULL) { - res = -ENOMEM; - goto out; - } - - rose_node->address = rose_route->address; - rose_node->mask = rose_route->mask; - rose_node->count = 1; - rose_node->loopback = 0; - rose_node->neighbour[0] = rose_neigh; - - if (rose_tmpn == NULL) { - if (rose_tmpp == NULL) { /* Empty list */ - rose_node_list = rose_node; - rose_node->next = NULL; - } else { - rose_tmpp->next = rose_node; - rose_node->next = NULL; - } - } else { - if (rose_tmpp == NULL) { /* 1st node */ - rose_node->next = rose_node_list; - rose_node_list = rose_node; - } else { - rose_tmpp->next = rose_node; - rose_node->next = rose_tmpn; - } - } - rose_neigh->count++; - rose_neigh_hold(rose_neigh); - - goto out; - } - - /* We have space, slot it in */ - if (rose_node->count < 3) { - rose_node->neighbour[rose_node->count] = rose_neigh; - rose_node->count++; - rose_neigh->count++; - rose_neigh_hold(rose_neigh); - } - -out: - spin_unlock_bh(&rose_neigh_list_lock); - spin_unlock_bh(&rose_node_list_lock); - - return res; -} - -/* - * Caller is holding rose_node_list_lock. - */ -static void rose_remove_node(struct rose_node *rose_node) -{ - struct rose_node *s; - - if ((s = rose_node_list) == rose_node) { - rose_node_list = rose_node->next; - kfree(rose_node); - return; - } - - while (s != NULL && s->next != NULL) { - if (s->next == rose_node) { - s->next = rose_node->next; - kfree(rose_node); - return; - } - - s = s->next; - } -} - -/* - * Caller is holding rose_neigh_list_lock. - */ -static void rose_remove_neigh(struct rose_neigh *rose_neigh) -{ - struct rose_neigh *s; - - timer_delete_sync(&rose_neigh->ftimer); - timer_delete_sync(&rose_neigh->t0timer); - - skb_queue_purge(&rose_neigh->queue); - - if ((s = rose_neigh_list) == rose_neigh) { - rose_neigh_list = rose_neigh->next; - return; - } - - while (s != NULL && s->next != NULL) { - if (s->next == rose_neigh) { - s->next = rose_neigh->next; - return; - } - - s = s->next; - } -} - -/* - * Caller is holding rose_route_list_lock. - */ -static void rose_remove_route(struct rose_route *rose_route) -{ - struct rose_route *s; - - if (rose_route->neigh1 != NULL) - rose_neigh_put(rose_route->neigh1); - - if (rose_route->neigh2 != NULL) - rose_neigh_put(rose_route->neigh2); - - if ((s = rose_route_list) == rose_route) { - rose_route_list = rose_route->next; - kfree(rose_route); - return; - } - - while (s != NULL && s->next != NULL) { - if (s->next == rose_route) { - s->next = rose_route->next; - kfree(rose_route); - return; - } - - s = s->next; - } -} - -/* - * "Delete" a node. Strictly speaking remove a route to a node. The node - * is only deleted if no routes are left to it. - */ -static int rose_del_node(struct rose_route_struct *rose_route, - struct net_device *dev) -{ - struct rose_node *rose_node; - struct rose_neigh *rose_neigh; - int i, err = 0; - - spin_lock_bh(&rose_node_list_lock); - spin_lock_bh(&rose_neigh_list_lock); - - rose_node = rose_node_list; - while (rose_node != NULL) { - if ((rose_node->mask == rose_route->mask) && - (rosecmpm(&rose_route->address, &rose_node->address, - rose_route->mask) == 0)) - break; - rose_node = rose_node->next; - } - - if (rose_node == NULL || rose_node->loopback) { - err = -EINVAL; - goto out; - } - - rose_neigh = rose_neigh_list; - while (rose_neigh != NULL) { - if (ax25cmp(&rose_route->neighbour, - &rose_neigh->callsign) == 0 && - rose_neigh->dev == dev) - break; - rose_neigh = rose_neigh->next; - } - - if (rose_neigh == NULL) { - err = -EINVAL; - goto out; - } - - for (i = 0; i < rose_node->count; i++) { - if (rose_node->neighbour[i] == rose_neigh) { - rose_neigh->count--; - rose_neigh_put(rose_neigh); - - if (rose_neigh->count == 0) { - rose_remove_neigh(rose_neigh); - rose_neigh_put(rose_neigh); - } - - rose_node->count--; - - if (rose_node->count == 0) { - rose_remove_node(rose_node); - } else { - switch (i) { - case 0: - rose_node->neighbour[0] = - rose_node->neighbour[1]; - fallthrough; - case 1: - rose_node->neighbour[1] = - rose_node->neighbour[2]; - break; - case 2: - break; - } - } - goto out; - } - } - err = -EINVAL; - -out: - spin_unlock_bh(&rose_neigh_list_lock); - spin_unlock_bh(&rose_node_list_lock); - - return err; -} - -/* - * Add the loopback neighbour. - */ -void rose_add_loopback_neigh(void) -{ - struct rose_neigh *sn; - - rose_loopback_neigh = kmalloc_obj(struct rose_neigh); - if (!rose_loopback_neigh) - return; - sn = rose_loopback_neigh; - - sn->callsign = null_ax25_address; - sn->digipeat = NULL; - sn->ax25 = NULL; - sn->dev = NULL; - sn->count = 0; - sn->dce_mode = 1; - sn->loopback = 1; - sn->number = rose_neigh_no++; - sn->restarted = 1; - refcount_set(&sn->use, 1); - - skb_queue_head_init(&sn->queue); - - timer_setup(&sn->ftimer, NULL, 0); - timer_setup(&sn->t0timer, NULL, 0); - - spin_lock_bh(&rose_neigh_list_lock); - sn->next = rose_neigh_list; - rose_neigh_list = sn; - spin_unlock_bh(&rose_neigh_list_lock); -} - -/* - * Add a loopback node. - */ -int rose_add_loopback_node(const rose_address *address) -{ - struct rose_node *rose_node; - int err = 0; - - spin_lock_bh(&rose_node_list_lock); - - rose_node = rose_node_list; - while (rose_node != NULL) { - if ((rose_node->mask == 10) && - (rosecmpm(address, &rose_node->address, 10) == 0) && - rose_node->loopback) - break; - rose_node = rose_node->next; - } - - if (rose_node != NULL) - goto out; - - if ((rose_node = kmalloc_obj(*rose_node, GFP_ATOMIC)) == NULL) { - err = -ENOMEM; - goto out; - } - - rose_node->address = *address; - rose_node->mask = 10; - rose_node->count = 1; - rose_node->loopback = 1; - rose_node->neighbour[0] = rose_loopback_neigh; - - /* Insert at the head of list. Address is always mask=10 */ - rose_node->next = rose_node_list; - rose_node_list = rose_node; - - rose_loopback_neigh->count++; - rose_neigh_hold(rose_loopback_neigh); - -out: - spin_unlock_bh(&rose_node_list_lock); - - return err; -} - -/* - * Delete a loopback node. - */ -void rose_del_loopback_node(const rose_address *address) -{ - struct rose_node *rose_node; - - spin_lock_bh(&rose_node_list_lock); - - rose_node = rose_node_list; - while (rose_node != NULL) { - if ((rose_node->mask == 10) && - (rosecmpm(address, &rose_node->address, 10) == 0) && - rose_node->loopback) - break; - rose_node = rose_node->next; - } - - if (rose_node == NULL) - goto out; - - rose_remove_node(rose_node); - - rose_loopback_neigh->count--; - rose_neigh_put(rose_loopback_neigh); - -out: - spin_unlock_bh(&rose_node_list_lock); -} - -/* - * A device has been removed. Remove its routes and neighbours. - */ -void rose_rt_device_down(struct net_device *dev) -{ - struct rose_neigh *s, *rose_neigh; - struct rose_node *t, *rose_node; - int i; - - spin_lock_bh(&rose_node_list_lock); - spin_lock_bh(&rose_neigh_list_lock); - rose_neigh = rose_neigh_list; - while (rose_neigh != NULL) { - s = rose_neigh; - rose_neigh = rose_neigh->next; - - if (s->dev != dev) - continue; - - rose_node = rose_node_list; - - while (rose_node != NULL) { - t = rose_node; - rose_node = rose_node->next; - - for (i = t->count - 1; i >= 0; i--) { - if (t->neighbour[i] != s) - continue; - - t->count--; - - memmove(&t->neighbour[i], &t->neighbour[i + 1], - sizeof(t->neighbour[0]) * - (t->count - i)); - rose_neigh_put(s); - } - - if (t->count <= 0) - rose_remove_node(t); - } - - rose_remove_neigh(s); - rose_neigh_put(s); - } - spin_unlock_bh(&rose_neigh_list_lock); - spin_unlock_bh(&rose_node_list_lock); -} - -#if 0 /* Currently unused */ -/* - * A device has been removed. Remove its links. - */ -void rose_route_device_down(struct net_device *dev) -{ - struct rose_route *s, *rose_route; - - spin_lock_bh(&rose_route_list_lock); - rose_route = rose_route_list; - while (rose_route != NULL) { - s = rose_route; - rose_route = rose_route->next; - - if (s->neigh1->dev == dev || s->neigh2->dev == dev) - rose_remove_route(s); - } - spin_unlock_bh(&rose_route_list_lock); -} -#endif - -/* - * Clear all nodes and neighbours out, except for neighbours with - * active connections going through them. - * Do not clear loopback neighbour and nodes. - */ -static int rose_clear_routes(void) -{ - struct rose_neigh *s, *rose_neigh; - struct rose_node *t, *rose_node; - int i; - - spin_lock_bh(&rose_node_list_lock); - spin_lock_bh(&rose_neigh_list_lock); - - rose_neigh = rose_neigh_list; - rose_node = rose_node_list; - - while (rose_node != NULL) { - t = rose_node; - rose_node = rose_node->next; - - if (!t->loopback) { - for (i = 0; i < t->count; i++) - rose_neigh_put(t->neighbour[i]); - rose_remove_node(t); - } - } - - while (rose_neigh != NULL) { - s = rose_neigh; - rose_neigh = rose_neigh->next; - - if (!s->loopback) { - rose_remove_neigh(s); - rose_neigh_put(s); - } - } - - spin_unlock_bh(&rose_neigh_list_lock); - spin_unlock_bh(&rose_node_list_lock); - - return 0; -} - -/* - * Check that the device given is a valid AX.25 interface that is "up". - * called with RTNL - */ -static struct net_device *rose_ax25_dev_find(char *devname) -{ - struct net_device *dev; - - if ((dev = __dev_get_by_name(&init_net, devname)) == NULL) - return NULL; - - if ((dev->flags & IFF_UP) && dev->type == ARPHRD_AX25) - return dev; - - return NULL; -} - -/* - * Find the first active ROSE device, usually "rose0". - */ -struct net_device *rose_dev_first(void) -{ - struct net_device *dev, *first = NULL; - - rcu_read_lock(); - for_each_netdev_rcu(&init_net, dev) { - if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE) - if (first == NULL || strncmp(dev->name, first->name, 3) < 0) - first = dev; - } - if (first) - dev_hold(first); - rcu_read_unlock(); - - return first; -} - -/* - * Find the ROSE device for the given address. - */ -struct net_device *rose_dev_get(rose_address *addr) -{ - struct net_device *dev; - - rcu_read_lock(); - for_each_netdev_rcu(&init_net, dev) { - if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && - rosecmp(addr, (const rose_address *)dev->dev_addr) == 0) { - dev_hold(dev); - goto out; - } - } - dev = NULL; -out: - rcu_read_unlock(); - return dev; -} - -static int rose_dev_exists(rose_address *addr) -{ - struct net_device *dev; - - rcu_read_lock(); - for_each_netdev_rcu(&init_net, dev) { - if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && - rosecmp(addr, (const rose_address *)dev->dev_addr) == 0) - goto out; - } - dev = NULL; -out: - rcu_read_unlock(); - return dev != NULL; -} - - - - -struct rose_route *rose_route_free_lci(unsigned int lci, struct rose_neigh *neigh) -{ - struct rose_route *rose_route; - - for (rose_route = rose_route_list; rose_route != NULL; rose_route = rose_route->next) - if ((rose_route->neigh1 == neigh && rose_route->lci1 == lci) || - (rose_route->neigh2 == neigh && rose_route->lci2 == lci)) - return rose_route; - - return NULL; -} - -/* - * Find a neighbour or a route given a ROSE address. - */ -struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause, - unsigned char *diagnostic, int route_frame) -{ - struct rose_neigh *res = NULL; - struct rose_node *node; - int failed = 0; - int i; - - if (!route_frame) spin_lock_bh(&rose_node_list_lock); - for (node = rose_node_list; node != NULL; node = node->next) { - if (rosecmpm(addr, &node->address, node->mask) == 0) { - for (i = 0; i < node->count; i++) { - if (node->neighbour[i]->restarted) { - res = node->neighbour[i]; - rose_neigh_hold(node->neighbour[i]); - goto out; - } - } - } - } - if (!route_frame) { /* connect request */ - for (node = rose_node_list; node != NULL; node = node->next) { - if (rosecmpm(addr, &node->address, node->mask) == 0) { - for (i = 0; i < node->count; i++) { - if (!rose_ftimer_running(node->neighbour[i])) { - res = node->neighbour[i]; - rose_neigh_hold(node->neighbour[i]); - goto out; - } - failed = 1; - } - } - } - } - - if (failed) { - *cause = ROSE_OUT_OF_ORDER; - *diagnostic = 0; - } else { - *cause = ROSE_NOT_OBTAINABLE; - *diagnostic = 0; - } - -out: - if (!route_frame) spin_unlock_bh(&rose_node_list_lock); - return res; -} - -/* - * Handle the ioctls that control the routing functions. - */ -int rose_rt_ioctl(unsigned int cmd, void __user *arg) -{ - struct rose_route_struct rose_route; - struct net_device *dev; - int err; - - switch (cmd) { - case SIOCADDRT: - if (copy_from_user(&rose_route, arg, sizeof(struct rose_route_struct))) - return -EFAULT; - if ((dev = rose_ax25_dev_find(rose_route.device)) == NULL) - return -EINVAL; - if (rose_dev_exists(&rose_route.address)) /* Can't add routes to ourself */ - return -EINVAL; - if (rose_route.mask > 10) /* Mask can't be more than 10 digits */ - return -EINVAL; - if (rose_route.ndigis > AX25_MAX_DIGIS) - return -EINVAL; - err = rose_add_node(&rose_route, dev); - return err; - - case SIOCDELRT: - if (copy_from_user(&rose_route, arg, sizeof(struct rose_route_struct))) - return -EFAULT; - if ((dev = rose_ax25_dev_find(rose_route.device)) == NULL) - return -EINVAL; - err = rose_del_node(&rose_route, dev); - return err; - - case SIOCRSCLRRT: - return rose_clear_routes(); - - default: - return -EINVAL; - } - - return 0; -} - -static void rose_del_route_by_neigh(struct rose_neigh *rose_neigh) -{ - struct rose_route *rose_route, *s; - - rose_neigh->restarted = 0; - - rose_stop_t0timer(rose_neigh); - rose_start_ftimer(rose_neigh); - - skb_queue_purge(&rose_neigh->queue); - - spin_lock_bh(&rose_route_list_lock); - - rose_route = rose_route_list; - - while (rose_route != NULL) { - if ((rose_route->neigh1 == rose_neigh && rose_route->neigh2 == rose_neigh) || - (rose_route->neigh1 == rose_neigh && rose_route->neigh2 == NULL) || - (rose_route->neigh2 == rose_neigh && rose_route->neigh1 == NULL)) { - s = rose_route->next; - rose_remove_route(rose_route); - rose_route = s; - continue; - } - - if (rose_route->neigh1 == rose_neigh) { - rose_neigh_put(rose_route->neigh1); - rose_route->neigh1 = NULL; - rose_transmit_clear_request(rose_route->neigh2, rose_route->lci2, ROSE_OUT_OF_ORDER, 0); - } - - if (rose_route->neigh2 == rose_neigh) { - rose_neigh_put(rose_route->neigh2); - rose_route->neigh2 = NULL; - rose_transmit_clear_request(rose_route->neigh1, rose_route->lci1, ROSE_OUT_OF_ORDER, 0); - } - - rose_route = rose_route->next; - } - spin_unlock_bh(&rose_route_list_lock); -} - -/* - * A level 2 link has timed out, therefore it appears to be a poor link, - * then don't use that neighbour until it is reset. Blow away all through - * routes and connections using this route. - */ -void rose_link_failed(ax25_cb *ax25, int reason) -{ - struct rose_neigh *rose_neigh; - - spin_lock_bh(&rose_neigh_list_lock); - rose_neigh = rose_neigh_list; - while (rose_neigh != NULL) { - if (rose_neigh->ax25 == ax25) - break; - rose_neigh = rose_neigh->next; - } - - if (rose_neigh != NULL) { - rose_neigh->ax25 = NULL; - ax25_cb_put(ax25); - - rose_del_route_by_neigh(rose_neigh); - rose_kill_by_neigh(rose_neigh); - } - spin_unlock_bh(&rose_neigh_list_lock); -} - -/* - * A device has been "downed" remove its link status. Blow away all - * through routes and connections that use this device. - */ -void rose_link_device_down(struct net_device *dev) -{ - struct rose_neigh *rose_neigh; - - for (rose_neigh = rose_neigh_list; rose_neigh != NULL; rose_neigh = rose_neigh->next) { - if (rose_neigh->dev == dev) { - rose_del_route_by_neigh(rose_neigh); - rose_kill_by_neigh(rose_neigh); - } - } -} - -/* - * Route a frame to an appropriate AX.25 connection. - * A NULL ax25_cb indicates an internally generated frame. - */ -int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) -{ - struct rose_neigh *rose_neigh, *new_neigh; - struct rose_route *rose_route; - struct rose_facilities_struct facilities; - rose_address *src_addr, *dest_addr; - struct sock *sk; - unsigned short frametype; - unsigned int lci, new_lci; - unsigned char cause, diagnostic; - struct net_device *dev; - int res = 0; - char buf[11]; - - if (skb->len < ROSE_MIN_LEN) - return res; - - if (!ax25) - return rose_loopback_queue(skb, NULL); - - frametype = skb->data[2]; - lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF); - if (frametype == ROSE_CALL_REQUEST && - (skb->len <= ROSE_CALL_REQ_FACILITIES_OFF || - skb->data[ROSE_CALL_REQ_ADDR_LEN_OFF] != - ROSE_CALL_REQ_ADDR_LEN_VAL)) - return res; - src_addr = (rose_address *)(skb->data + ROSE_CALL_REQ_SRC_ADDR_OFF); - dest_addr = (rose_address *)(skb->data + ROSE_CALL_REQ_DEST_ADDR_OFF); - - spin_lock_bh(&rose_neigh_list_lock); - spin_lock_bh(&rose_route_list_lock); - - rose_neigh = rose_neigh_list; - while (rose_neigh != NULL) { - if (ax25cmp(&ax25->dest_addr, &rose_neigh->callsign) == 0 && - ax25->ax25_dev->dev == rose_neigh->dev) - break; - rose_neigh = rose_neigh->next; - } - - if (rose_neigh == NULL) { - printk("rose_route : unknown neighbour or device %s\n", - ax2asc(buf, &ax25->dest_addr)); - goto out; - } - - /* - * Obviously the link is working, halt the ftimer. - */ - rose_stop_ftimer(rose_neigh); - - /* - * LCI of zero is always for us, and its always a restart - * frame. - */ - if (lci == 0) { - rose_link_rx_restart(skb, rose_neigh, frametype); - goto out; - } - - /* - * Find an existing socket. - */ - if ((sk = rose_find_socket(lci, rose_neigh)) != NULL) { - if (frametype == ROSE_CALL_REQUEST) { - struct rose_sock *rose = rose_sk(sk); - - /* Remove an existing unused socket */ - rose_clear_queues(sk); - rose->cause = ROSE_NETWORK_CONGESTION; - rose->diagnostic = 0; - rose_neigh_put(rose->neighbour); - rose->neighbour = NULL; - rose->lci = 0; - rose->state = ROSE_STATE_0; - sk->sk_state = TCP_CLOSE; - sk->sk_err = 0; - sk->sk_shutdown |= SEND_SHUTDOWN; - if (!sock_flag(sk, SOCK_DEAD)) { - sk->sk_state_change(sk); - sock_set_flag(sk, SOCK_DEAD); - } - } - else { - skb_reset_transport_header(skb); - res = rose_process_rx_frame(sk, skb); - goto out; - } - } - - /* - * Is is a Call Request and is it for us ? - */ - if (frametype == ROSE_CALL_REQUEST) - if ((dev = rose_dev_get(dest_addr)) != NULL) { - res = rose_rx_call_request(skb, dev, rose_neigh, lci); - dev_put(dev); - goto out; - } - - if (!sysctl_rose_routing_control) { - rose_transmit_clear_request(rose_neigh, lci, ROSE_NOT_OBTAINABLE, 0); - goto out; - } - - /* - * Route it to the next in line if we have an entry for it. - */ - rose_route = rose_route_list; - while (rose_route != NULL) { - if (rose_route->lci1 == lci && - rose_route->neigh1 == rose_neigh) { - if (frametype == ROSE_CALL_REQUEST) { - /* F6FBB - Remove an existing unused route */ - rose_remove_route(rose_route); - break; - } else if (rose_route->neigh2 != NULL) { - skb->data[0] &= 0xF0; - skb->data[0] |= (rose_route->lci2 >> 8) & 0x0F; - skb->data[1] = (rose_route->lci2 >> 0) & 0xFF; - rose_transmit_link(skb, rose_route->neigh2); - if (frametype == ROSE_CLEAR_CONFIRMATION) - rose_remove_route(rose_route); - res = 1; - goto out; - } else { - if (frametype == ROSE_CLEAR_CONFIRMATION) - rose_remove_route(rose_route); - goto out; - } - } - if (rose_route->lci2 == lci && - rose_route->neigh2 == rose_neigh) { - if (frametype == ROSE_CALL_REQUEST) { - /* F6FBB - Remove an existing unused route */ - rose_remove_route(rose_route); - break; - } else if (rose_route->neigh1 != NULL) { - skb->data[0] &= 0xF0; - skb->data[0] |= (rose_route->lci1 >> 8) & 0x0F; - skb->data[1] = (rose_route->lci1 >> 0) & 0xFF; - rose_transmit_link(skb, rose_route->neigh1); - if (frametype == ROSE_CLEAR_CONFIRMATION) - rose_remove_route(rose_route); - res = 1; - goto out; - } else { - if (frametype == ROSE_CLEAR_CONFIRMATION) - rose_remove_route(rose_route); - goto out; - } - } - rose_route = rose_route->next; - } - - /* - * We know that: - * 1. The frame isn't for us, - * 2. It isn't "owned" by any existing route. - */ - if (frametype != ROSE_CALL_REQUEST) { /* XXX */ - res = 0; - goto out; - } - - memset(&facilities, 0x00, sizeof(struct rose_facilities_struct)); - - if (!rose_parse_facilities(skb->data + ROSE_CALL_REQ_FACILITIES_OFF, - skb->len - ROSE_CALL_REQ_FACILITIES_OFF, - &facilities)) { - rose_transmit_clear_request(rose_neigh, lci, ROSE_INVALID_FACILITY, 76); - goto out; - } - - /* - * Check for routing loops. - */ - rose_route = rose_route_list; - while (rose_route != NULL) { - if (rose_route->rand == facilities.rand && - rosecmp(src_addr, &rose_route->src_addr) == 0 && - ax25cmp(&facilities.dest_call, &rose_route->src_call) == 0 && - ax25cmp(&facilities.source_call, &rose_route->dest_call) == 0) { - rose_transmit_clear_request(rose_neigh, lci, ROSE_NOT_OBTAINABLE, 120); - goto out; - } - rose_route = rose_route->next; - } - - if ((new_neigh = rose_get_neigh(dest_addr, &cause, &diagnostic, 1)) == NULL) { - rose_transmit_clear_request(rose_neigh, lci, cause, diagnostic); - goto out; - } - - if ((new_lci = rose_new_lci(new_neigh)) == 0) { - rose_transmit_clear_request(rose_neigh, lci, ROSE_NETWORK_CONGESTION, 71); - goto put_neigh; - } - - if ((rose_route = kmalloc_obj(*rose_route, GFP_ATOMIC)) == NULL) { - rose_transmit_clear_request(rose_neigh, lci, ROSE_NETWORK_CONGESTION, 120); - goto put_neigh; - } - - rose_route->lci1 = lci; - rose_route->src_addr = *src_addr; - rose_route->dest_addr = *dest_addr; - rose_route->src_call = facilities.dest_call; - rose_route->dest_call = facilities.source_call; - rose_route->rand = facilities.rand; - rose_route->neigh1 = rose_neigh; - rose_route->lci2 = new_lci; - rose_route->neigh2 = new_neigh; - - rose_neigh_hold(rose_route->neigh1); - rose_neigh_hold(rose_route->neigh2); - - rose_route->next = rose_route_list; - rose_route_list = rose_route; - - skb->data[0] &= 0xF0; - skb->data[0] |= (rose_route->lci2 >> 8) & 0x0F; - skb->data[1] = (rose_route->lci2 >> 0) & 0xFF; - - rose_transmit_link(skb, rose_route->neigh2); - res = 1; - -put_neigh: - rose_neigh_put(new_neigh); -out: - spin_unlock_bh(&rose_route_list_lock); - spin_unlock_bh(&rose_neigh_list_lock); - - return res; -} - -#ifdef CONFIG_PROC_FS - -static void *rose_node_start(struct seq_file *seq, loff_t *pos) - __acquires(rose_node_list_lock) -{ - struct rose_node *rose_node; - int i = 1; - - spin_lock_bh(&rose_node_list_lock); - if (*pos == 0) - return SEQ_START_TOKEN; - - for (rose_node = rose_node_list; rose_node && i < *pos; - rose_node = rose_node->next, ++i); - - return (i == *pos) ? rose_node : NULL; -} - -static void *rose_node_next(struct seq_file *seq, void *v, loff_t *pos) -{ - ++*pos; - - return (v == SEQ_START_TOKEN) ? rose_node_list - : ((struct rose_node *)v)->next; -} - -static void rose_node_stop(struct seq_file *seq, void *v) - __releases(rose_node_list_lock) -{ - spin_unlock_bh(&rose_node_list_lock); -} - -static int rose_node_show(struct seq_file *seq, void *v) -{ - char rsbuf[11]; - int i; - - if (v == SEQ_START_TOKEN) - seq_puts(seq, "address mask n neigh neigh neigh\n"); - else { - const struct rose_node *rose_node = v; - seq_printf(seq, "%-10s %04d %d", - rose2asc(rsbuf, &rose_node->address), - rose_node->mask, - rose_node->count); - - for (i = 0; i < rose_node->count; i++) - seq_printf(seq, " %05d", rose_node->neighbour[i]->number); - - seq_puts(seq, "\n"); - } - return 0; -} - -const struct seq_operations rose_node_seqops = { - .start = rose_node_start, - .next = rose_node_next, - .stop = rose_node_stop, - .show = rose_node_show, -}; - -static void *rose_neigh_start(struct seq_file *seq, loff_t *pos) - __acquires(rose_neigh_list_lock) -{ - struct rose_neigh *rose_neigh; - int i = 1; - - spin_lock_bh(&rose_neigh_list_lock); - if (*pos == 0) - return SEQ_START_TOKEN; - - for (rose_neigh = rose_neigh_list; rose_neigh && i < *pos; - rose_neigh = rose_neigh->next, ++i); - - return (i == *pos) ? rose_neigh : NULL; -} - -static void *rose_neigh_next(struct seq_file *seq, void *v, loff_t *pos) -{ - ++*pos; - - return (v == SEQ_START_TOKEN) ? rose_neigh_list - : ((struct rose_neigh *)v)->next; -} - -static void rose_neigh_stop(struct seq_file *seq, void *v) - __releases(rose_neigh_list_lock) -{ - spin_unlock_bh(&rose_neigh_list_lock); -} - -static int rose_neigh_show(struct seq_file *seq, void *v) -{ - char buf[11]; - int i; - - if (v == SEQ_START_TOKEN) - seq_puts(seq, - "addr callsign dev count use mode restart t0 tf digipeaters\n"); - else { - struct rose_neigh *rose_neigh = v; - - /* if (!rose_neigh->loopback) { */ - seq_printf(seq, "%05d %-9s %-4s %3d %3d %3s %3s %3lu %3lu", - rose_neigh->number, - (rose_neigh->loopback) ? "RSLOOP-0" : ax2asc(buf, &rose_neigh->callsign), - rose_neigh->dev ? rose_neigh->dev->name : "???", - rose_neigh->count, - refcount_read(&rose_neigh->use) - rose_neigh->count - 1, - (rose_neigh->dce_mode) ? "DCE" : "DTE", - (rose_neigh->restarted) ? "yes" : "no", - ax25_display_timer(&rose_neigh->t0timer) / HZ, - ax25_display_timer(&rose_neigh->ftimer) / HZ); - - if (rose_neigh->digipeat != NULL) { - for (i = 0; i < rose_neigh->digipeat->ndigi; i++) - seq_printf(seq, " %s", ax2asc(buf, &rose_neigh->digipeat->calls[i])); - } - - seq_puts(seq, "\n"); - } - return 0; -} - - -const struct seq_operations rose_neigh_seqops = { - .start = rose_neigh_start, - .next = rose_neigh_next, - .stop = rose_neigh_stop, - .show = rose_neigh_show, -}; - -static void *rose_route_start(struct seq_file *seq, loff_t *pos) - __acquires(rose_route_list_lock) -{ - struct rose_route *rose_route; - int i = 1; - - spin_lock_bh(&rose_route_list_lock); - if (*pos == 0) - return SEQ_START_TOKEN; - - for (rose_route = rose_route_list; rose_route && i < *pos; - rose_route = rose_route->next, ++i); - - return (i == *pos) ? rose_route : NULL; -} - -static void *rose_route_next(struct seq_file *seq, void *v, loff_t *pos) -{ - ++*pos; - - return (v == SEQ_START_TOKEN) ? rose_route_list - : ((struct rose_route *)v)->next; -} - -static void rose_route_stop(struct seq_file *seq, void *v) - __releases(rose_route_list_lock) -{ - spin_unlock_bh(&rose_route_list_lock); -} - -static int rose_route_show(struct seq_file *seq, void *v) -{ - char buf[11], rsbuf[11]; - - if (v == SEQ_START_TOKEN) - seq_puts(seq, - "lci address callsign neigh <-> lci address callsign neigh\n"); - else { - struct rose_route *rose_route = v; - - if (rose_route->neigh1) - seq_printf(seq, - "%3.3X %-10s %-9s %05d ", - rose_route->lci1, - rose2asc(rsbuf, &rose_route->src_addr), - ax2asc(buf, &rose_route->src_call), - rose_route->neigh1->number); - else - seq_puts(seq, - "000 * * 00000 "); - - if (rose_route->neigh2) - seq_printf(seq, - "%3.3X %-10s %-9s %05d\n", - rose_route->lci2, - rose2asc(rsbuf, &rose_route->dest_addr), - ax2asc(buf, &rose_route->dest_call), - rose_route->neigh2->number); - else - seq_puts(seq, - "000 * * 00000\n"); - } - return 0; -} - -struct seq_operations rose_route_seqops = { - .start = rose_route_start, - .next = rose_route_next, - .stop = rose_route_stop, - .show = rose_route_show, -}; -#endif /* CONFIG_PROC_FS */ - -/* - * Release all memory associated with ROSE routing structures. - */ -void __exit rose_rt_free(void) -{ - struct rose_neigh *s, *rose_neigh = rose_neigh_list; - struct rose_node *t, *rose_node = rose_node_list; - struct rose_route *u, *rose_route = rose_route_list; - int i; - - while (rose_neigh != NULL) { - s = rose_neigh; - rose_neigh = rose_neigh->next; - - rose_remove_neigh(s); - rose_neigh_put(s); - } - - while (rose_node != NULL) { - t = rose_node; - rose_node = rose_node->next; - - for (i = 0; i < t->count; i++) - rose_neigh_put(t->neighbour[i]); - rose_remove_node(t); - } - - while (rose_route != NULL) { - u = rose_route; - rose_route = rose_route->next; - - rose_remove_route(u); - } -} diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c deleted file mode 100644 index 4dbc437a9e22..000000000000 --- a/net/rose/rose_subr.c +++ /dev/null @@ -1,556 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - */ -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/slab.h> -#include <net/ax25.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <net/tcp_states.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <net/rose.h> - -static int rose_create_facilities(unsigned char *buffer, struct rose_sock *rose); - -/* - * This routine purges all of the queues of frames. - */ -void rose_clear_queues(struct sock *sk) -{ - skb_queue_purge(&sk->sk_write_queue); - skb_queue_purge(&rose_sk(sk)->ack_queue); -} - -/* - * This routine purges the input queue of those frames that have been - * acknowledged. This replaces the boxes labelled "V(a) <- N(r)" on the - * SDL diagram. - */ -void rose_frames_acked(struct sock *sk, unsigned short nr) -{ - struct sk_buff *skb; - struct rose_sock *rose = rose_sk(sk); - - /* - * Remove all the ack-ed frames from the ack queue. - */ - if (rose->va != nr) { - while (skb_peek(&rose->ack_queue) != NULL && rose->va != nr) { - skb = skb_dequeue(&rose->ack_queue); - kfree_skb(skb); - rose->va = (rose->va + 1) % ROSE_MODULUS; - } - } -} - -void rose_requeue_frames(struct sock *sk) -{ - struct sk_buff *skb, *skb_prev = NULL; - - /* - * Requeue all the un-ack-ed frames on the output queue to be picked - * up by rose_kick. This arrangement handles the possibility of an - * empty output queue. - */ - while ((skb = skb_dequeue(&rose_sk(sk)->ack_queue)) != NULL) { - if (skb_prev == NULL) - skb_queue_head(&sk->sk_write_queue, skb); - else - skb_append(skb_prev, skb, &sk->sk_write_queue); - skb_prev = skb; - } -} - -/* - * Validate that the value of nr is between va and vs. Return true or - * false for testing. - */ -int rose_validate_nr(struct sock *sk, unsigned short nr) -{ - struct rose_sock *rose = rose_sk(sk); - unsigned short vc = rose->va; - - while (vc != rose->vs) { - if (nr == vc) return 1; - vc = (vc + 1) % ROSE_MODULUS; - } - - return nr == rose->vs; -} - -/* - * This routine is called when the packet layer internally generates a - * control frame. - */ -void rose_write_internal(struct sock *sk, int frametype) -{ - struct rose_sock *rose = rose_sk(sk); - struct sk_buff *skb; - unsigned char *dptr; - unsigned char lci1, lci2; - int maxfaclen = 0; - int len, faclen; - int reserve; - - reserve = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + 1; - len = ROSE_MIN_LEN; - - switch (frametype) { - case ROSE_CALL_REQUEST: - len += 1 + ROSE_ADDR_LEN + ROSE_ADDR_LEN; - maxfaclen = 256; - break; - case ROSE_CALL_ACCEPTED: - case ROSE_CLEAR_REQUEST: - case ROSE_RESET_REQUEST: - len += 2; - break; - } - - skb = alloc_skb(reserve + len + maxfaclen, GFP_ATOMIC); - if (!skb) - return; - - /* - * Space for AX.25 header and PID. - */ - skb_reserve(skb, reserve); - - dptr = skb_put(skb, len); - - lci1 = (rose->lci >> 8) & 0x0F; - lci2 = (rose->lci >> 0) & 0xFF; - - switch (frametype) { - case ROSE_CALL_REQUEST: - *dptr++ = ROSE_GFI | lci1; - *dptr++ = lci2; - *dptr++ = frametype; - *dptr++ = ROSE_CALL_REQ_ADDR_LEN_VAL; - memcpy(dptr, &rose->dest_addr, ROSE_ADDR_LEN); - dptr += ROSE_ADDR_LEN; - memcpy(dptr, &rose->source_addr, ROSE_ADDR_LEN); - dptr += ROSE_ADDR_LEN; - faclen = rose_create_facilities(dptr, rose); - skb_put(skb, faclen); - dptr += faclen; - break; - - case ROSE_CALL_ACCEPTED: - *dptr++ = ROSE_GFI | lci1; - *dptr++ = lci2; - *dptr++ = frametype; - *dptr++ = 0x00; /* Address length */ - *dptr++ = 0; /* Facilities length */ - break; - - case ROSE_CLEAR_REQUEST: - *dptr++ = ROSE_GFI | lci1; - *dptr++ = lci2; - *dptr++ = frametype; - *dptr++ = rose->cause; - *dptr++ = rose->diagnostic; - break; - - case ROSE_RESET_REQUEST: - *dptr++ = ROSE_GFI | lci1; - *dptr++ = lci2; - *dptr++ = frametype; - *dptr++ = ROSE_DTE_ORIGINATED; - *dptr++ = 0; - break; - - case ROSE_RR: - case ROSE_RNR: - *dptr++ = ROSE_GFI | lci1; - *dptr++ = lci2; - *dptr = frametype; - *dptr++ |= (rose->vr << 5) & 0xE0; - break; - - case ROSE_CLEAR_CONFIRMATION: - case ROSE_RESET_CONFIRMATION: - *dptr++ = ROSE_GFI | lci1; - *dptr++ = lci2; - *dptr++ = frametype; - break; - - default: - printk(KERN_ERR "ROSE: rose_write_internal - invalid frametype %02X\n", frametype); - kfree_skb(skb); - return; - } - - rose_transmit_link(skb, rose->neighbour); -} - -int rose_decode(struct sk_buff *skb, int *ns, int *nr, int *q, int *d, int *m) -{ - unsigned char *frame; - - frame = skb->data; - - *ns = *nr = *q = *d = *m = 0; - - switch (frame[2]) { - case ROSE_CALL_REQUEST: - case ROSE_CALL_ACCEPTED: - case ROSE_CLEAR_REQUEST: - case ROSE_CLEAR_CONFIRMATION: - case ROSE_RESET_REQUEST: - case ROSE_RESET_CONFIRMATION: - return frame[2]; - default: - break; - } - - if ((frame[2] & 0x1F) == ROSE_RR || - (frame[2] & 0x1F) == ROSE_RNR) { - *nr = (frame[2] >> 5) & 0x07; - return frame[2] & 0x1F; - } - - if ((frame[2] & 0x01) == ROSE_DATA) { - *q = (frame[0] & ROSE_Q_BIT) == ROSE_Q_BIT; - *d = (frame[0] & ROSE_D_BIT) == ROSE_D_BIT; - *m = (frame[2] & ROSE_M_BIT) == ROSE_M_BIT; - *nr = (frame[2] >> 5) & 0x07; - *ns = (frame[2] >> 1) & 0x07; - return ROSE_DATA; - } - - return ROSE_ILLEGAL; -} - -static int rose_parse_national(unsigned char *p, struct rose_facilities_struct *facilities, int len) -{ - unsigned char *pt; - unsigned char l, lg, n = 0; - int fac_national_digis_received = 0; - - do { - switch (*p & 0xC0) { - case 0x00: - if (len < 2) - return -1; - p += 2; - n += 2; - len -= 2; - break; - - case 0x40: - if (len < 3) - return -1; - if (*p == FAC_NATIONAL_RAND) - facilities->rand = ((p[1] << 8) & 0xFF00) + ((p[2] << 0) & 0x00FF); - p += 3; - n += 3; - len -= 3; - break; - - case 0x80: - if (len < 4) - return -1; - p += 4; - n += 4; - len -= 4; - break; - - case 0xC0: - if (len < 2) - return -1; - l = p[1]; - if (len < 2 + l) - return -1; - if (*p == FAC_NATIONAL_DEST_DIGI) { - if (!fac_national_digis_received) { - if (l < AX25_ADDR_LEN) - return -1; - memcpy(&facilities->source_digis[0], p + 2, AX25_ADDR_LEN); - facilities->source_ndigis = 1; - } - } - else if (*p == FAC_NATIONAL_SRC_DIGI) { - if (!fac_national_digis_received) { - if (l < AX25_ADDR_LEN) - return -1; - memcpy(&facilities->dest_digis[0], p + 2, AX25_ADDR_LEN); - facilities->dest_ndigis = 1; - } - } - else if (*p == FAC_NATIONAL_FAIL_CALL) { - if (l < AX25_ADDR_LEN) - return -1; - memcpy(&facilities->fail_call, p + 2, AX25_ADDR_LEN); - } - else if (*p == FAC_NATIONAL_FAIL_ADD) { - if (l < 1 + ROSE_ADDR_LEN) - return -1; - memcpy(&facilities->fail_addr, p + 3, ROSE_ADDR_LEN); - } - else if (*p == FAC_NATIONAL_DIGIS) { - if (l % AX25_ADDR_LEN) - return -1; - fac_national_digis_received = 1; - facilities->source_ndigis = 0; - facilities->dest_ndigis = 0; - for (pt = p + 2, lg = 0 ; lg < l ; pt += AX25_ADDR_LEN, lg += AX25_ADDR_LEN) { - if (pt[6] & AX25_HBIT) { - if (facilities->dest_ndigis >= ROSE_MAX_DIGIS) - return -1; - memcpy(&facilities->dest_digis[facilities->dest_ndigis++], pt, AX25_ADDR_LEN); - } else { - if (facilities->source_ndigis >= ROSE_MAX_DIGIS) - return -1; - memcpy(&facilities->source_digis[facilities->source_ndigis++], pt, AX25_ADDR_LEN); - } - } - } - p += l + 2; - n += l + 2; - len -= l + 2; - break; - } - } while (*p != 0x00 && len > 0); - - return n; -} - -static int rose_parse_ccitt(unsigned char *p, struct rose_facilities_struct *facilities, int len) -{ - unsigned char l, n = 0; - char callsign[11]; - - do { - switch (*p & 0xC0) { - case 0x00: - if (len < 2) - return -1; - p += 2; - n += 2; - len -= 2; - break; - - case 0x40: - if (len < 3) - return -1; - p += 3; - n += 3; - len -= 3; - break; - - case 0x80: - if (len < 4) - return -1; - p += 4; - n += 4; - len -= 4; - break; - - case 0xC0: - if (len < 2) - return -1; - l = p[1]; - - /* Prevent overflows*/ - if (l < 10 || l > 20) - return -1; - - if (*p == FAC_CCITT_DEST_NSAP) { - memcpy(&facilities->source_addr, p + 7, ROSE_ADDR_LEN); - memcpy(callsign, p + 12, l - 10); - callsign[l - 10] = '\0'; - asc2ax(&facilities->source_call, callsign); - } - if (*p == FAC_CCITT_SRC_NSAP) { - memcpy(&facilities->dest_addr, p + 7, ROSE_ADDR_LEN); - memcpy(callsign, p + 12, l - 10); - callsign[l - 10] = '\0'; - asc2ax(&facilities->dest_call, callsign); - } - p += l + 2; - n += l + 2; - len -= l + 2; - break; - } - } while (*p != 0x00 && len > 0); - - return n; -} - -int rose_parse_facilities(unsigned char *p, unsigned packet_len, - struct rose_facilities_struct *facilities) -{ - int facilities_len, len; - - facilities_len = *p++; - - if (facilities_len == 0 || (unsigned int)facilities_len > packet_len) - return 0; - - while (facilities_len >= 3 && *p == 0x00) { - facilities_len--; - p++; - - switch (*p) { - case FAC_NATIONAL: /* National */ - len = rose_parse_national(p + 1, facilities, facilities_len - 1); - break; - - case FAC_CCITT: /* CCITT */ - len = rose_parse_ccitt(p + 1, facilities, facilities_len - 1); - break; - - default: - printk(KERN_DEBUG "ROSE: rose_parse_facilities - unknown facilities family %02X\n", *p); - len = 1; - break; - } - - if (len < 0) - return 0; - if (WARN_ON(len >= facilities_len)) - return 0; - facilities_len -= len + 1; - p += len + 1; - } - - return facilities_len == 0; -} - -static int rose_create_facilities(unsigned char *buffer, struct rose_sock *rose) -{ - unsigned char *p = buffer + 1; - char *callsign; - char buf[11]; - int len, nb; - - /* National Facilities */ - if (rose->rand != 0 || rose->source_ndigis == 1 || rose->dest_ndigis == 1) { - *p++ = 0x00; - *p++ = FAC_NATIONAL; - - if (rose->rand != 0) { - *p++ = FAC_NATIONAL_RAND; - *p++ = (rose->rand >> 8) & 0xFF; - *p++ = (rose->rand >> 0) & 0xFF; - } - - /* Sent before older facilities */ - if ((rose->source_ndigis > 0) || (rose->dest_ndigis > 0)) { - int maxdigi = 0; - *p++ = FAC_NATIONAL_DIGIS; - *p++ = AX25_ADDR_LEN * (rose->source_ndigis + rose->dest_ndigis); - for (nb = 0 ; nb < rose->source_ndigis ; nb++) { - if (++maxdigi >= ROSE_MAX_DIGIS) - break; - memcpy(p, &rose->source_digis[nb], AX25_ADDR_LEN); - p[6] |= AX25_HBIT; - p += AX25_ADDR_LEN; - } - for (nb = 0 ; nb < rose->dest_ndigis ; nb++) { - if (++maxdigi >= ROSE_MAX_DIGIS) - break; - memcpy(p, &rose->dest_digis[nb], AX25_ADDR_LEN); - p[6] &= ~AX25_HBIT; - p += AX25_ADDR_LEN; - } - } - - /* For compatibility */ - if (rose->source_ndigis > 0) { - *p++ = FAC_NATIONAL_SRC_DIGI; - *p++ = AX25_ADDR_LEN; - memcpy(p, &rose->source_digis[0], AX25_ADDR_LEN); - p += AX25_ADDR_LEN; - } - - /* For compatibility */ - if (rose->dest_ndigis > 0) { - *p++ = FAC_NATIONAL_DEST_DIGI; - *p++ = AX25_ADDR_LEN; - memcpy(p, &rose->dest_digis[0], AX25_ADDR_LEN); - p += AX25_ADDR_LEN; - } - } - - *p++ = 0x00; - *p++ = FAC_CCITT; - - *p++ = FAC_CCITT_DEST_NSAP; - - callsign = ax2asc(buf, &rose->dest_call); - - *p++ = strlen(callsign) + 10; - *p++ = (strlen(callsign) + 9) * 2; /* ??? */ - - *p++ = 0x47; *p++ = 0x00; *p++ = 0x11; - *p++ = ROSE_ADDR_LEN * 2; - memcpy(p, &rose->dest_addr, ROSE_ADDR_LEN); - p += ROSE_ADDR_LEN; - - memcpy(p, callsign, strlen(callsign)); - p += strlen(callsign); - - *p++ = FAC_CCITT_SRC_NSAP; - - callsign = ax2asc(buf, &rose->source_call); - - *p++ = strlen(callsign) + 10; - *p++ = (strlen(callsign) + 9) * 2; /* ??? */ - - *p++ = 0x47; *p++ = 0x00; *p++ = 0x11; - *p++ = ROSE_ADDR_LEN * 2; - memcpy(p, &rose->source_addr, ROSE_ADDR_LEN); - p += ROSE_ADDR_LEN; - - memcpy(p, callsign, strlen(callsign)); - p += strlen(callsign); - - len = p - buffer; - buffer[0] = len - 1; - - return len; -} - -void rose_disconnect(struct sock *sk, int reason, int cause, int diagnostic) -{ - struct rose_sock *rose = rose_sk(sk); - - rose_stop_timer(sk); - rose_stop_idletimer(sk); - - rose_clear_queues(sk); - - rose->lci = 0; - rose->state = ROSE_STATE_0; - - if (cause != -1) - rose->cause = cause; - - if (diagnostic != -1) - rose->diagnostic = diagnostic; - - sk->sk_state = TCP_CLOSE; - sk->sk_err = reason; - sk->sk_shutdown |= SEND_SHUTDOWN; - - if (!sock_flag(sk, SOCK_DEAD)) { - sk->sk_state_change(sk); - sock_set_flag(sk, SOCK_DEAD); - } -} diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c deleted file mode 100644 index bb60a1654d61..000000000000 --- a/net/rose/rose_timer.c +++ /dev/null @@ -1,227 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) - * Copyright (C) 2002 Ralf Baechle DO1GRB (ralf@gnu.org) - */ -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/jiffies.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <net/ax25.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <net/tcp_states.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <net/rose.h> - -static void rose_heartbeat_expiry(struct timer_list *t); -static void rose_timer_expiry(struct timer_list *); -static void rose_idletimer_expiry(struct timer_list *); - -void rose_start_heartbeat(struct sock *sk) -{ - sk_stop_timer(sk, &sk->sk_timer); - - sk->sk_timer.function = rose_heartbeat_expiry; - sk->sk_timer.expires = jiffies + 5 * HZ; - - sk_reset_timer(sk, &sk->sk_timer, sk->sk_timer.expires); -} - -void rose_start_t1timer(struct sock *sk) -{ - struct rose_sock *rose = rose_sk(sk); - - sk_stop_timer(sk, &rose->timer); - - rose->timer.function = rose_timer_expiry; - rose->timer.expires = jiffies + rose->t1; - - sk_reset_timer(sk, &rose->timer, rose->timer.expires); -} - -void rose_start_t2timer(struct sock *sk) -{ - struct rose_sock *rose = rose_sk(sk); - - sk_stop_timer(sk, &rose->timer); - - rose->timer.function = rose_timer_expiry; - rose->timer.expires = jiffies + rose->t2; - - sk_reset_timer(sk, &rose->timer, rose->timer.expires); -} - -void rose_start_t3timer(struct sock *sk) -{ - struct rose_sock *rose = rose_sk(sk); - - sk_stop_timer(sk, &rose->timer); - - rose->timer.function = rose_timer_expiry; - rose->timer.expires = jiffies + rose->t3; - - sk_reset_timer(sk, &rose->timer, rose->timer.expires); -} - -void rose_start_hbtimer(struct sock *sk) -{ - struct rose_sock *rose = rose_sk(sk); - - sk_stop_timer(sk, &rose->timer); - - rose->timer.function = rose_timer_expiry; - rose->timer.expires = jiffies + rose->hb; - - sk_reset_timer(sk, &rose->timer, rose->timer.expires); -} - -void rose_start_idletimer(struct sock *sk) -{ - struct rose_sock *rose = rose_sk(sk); - - sk_stop_timer(sk, &rose->idletimer); - - if (rose->idle > 0) { - rose->idletimer.function = rose_idletimer_expiry; - rose->idletimer.expires = jiffies + rose->idle; - - sk_reset_timer(sk, &rose->idletimer, rose->idletimer.expires); - } -} - -void rose_stop_heartbeat(struct sock *sk) -{ - sk_stop_timer(sk, &sk->sk_timer); -} - -void rose_stop_timer(struct sock *sk) -{ - sk_stop_timer(sk, &rose_sk(sk)->timer); -} - -void rose_stop_idletimer(struct sock *sk) -{ - sk_stop_timer(sk, &rose_sk(sk)->idletimer); -} - -static void rose_heartbeat_expiry(struct timer_list *t) -{ - struct sock *sk = timer_container_of(sk, t, sk_timer); - struct rose_sock *rose = rose_sk(sk); - - bh_lock_sock(sk); - if (sock_owned_by_user(sk)) { - sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ/20); - goto out; - } - switch (rose->state) { - case ROSE_STATE_0: - /* Magic here: If we listen() and a new link dies before it - is accepted() it isn't 'dead' so doesn't get removed. */ - if (sock_flag(sk, SOCK_DESTROY) || - (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { - bh_unlock_sock(sk); - rose_destroy_socket(sk); - sock_put(sk); - return; - } - break; - - case ROSE_STATE_3: - /* - * Check for the state of the receive buffer. - */ - if (atomic_read(&sk->sk_rmem_alloc) < (sk->sk_rcvbuf / 2) && - (rose->condition & ROSE_COND_OWN_RX_BUSY)) { - rose->condition &= ~ROSE_COND_OWN_RX_BUSY; - rose->condition &= ~ROSE_COND_ACK_PENDING; - rose->vl = rose->vr; - rose_write_internal(sk, ROSE_RR); - rose_stop_timer(sk); /* HB */ - break; - } - break; - } - - rose_start_heartbeat(sk); -out: - bh_unlock_sock(sk); - sock_put(sk); -} - -static void rose_timer_expiry(struct timer_list *t) -{ - struct rose_sock *rose = timer_container_of(rose, t, timer); - struct sock *sk = &rose->sock; - - bh_lock_sock(sk); - if (sock_owned_by_user(sk)) { - sk_reset_timer(sk, &rose->timer, jiffies + HZ/20); - goto out; - } - switch (rose->state) { - case ROSE_STATE_1: /* T1 */ - case ROSE_STATE_4: /* T2 */ - rose_write_internal(sk, ROSE_CLEAR_REQUEST); - rose->state = ROSE_STATE_2; - rose_start_t3timer(sk); - break; - - case ROSE_STATE_2: /* T3 */ - rose_neigh_put(rose->neighbour); - rose_disconnect(sk, ETIMEDOUT, -1, -1); - break; - - case ROSE_STATE_3: /* HB */ - if (rose->condition & ROSE_COND_ACK_PENDING) { - rose->condition &= ~ROSE_COND_ACK_PENDING; - rose_enquiry_response(sk); - } - break; - } -out: - bh_unlock_sock(sk); - sock_put(sk); -} - -static void rose_idletimer_expiry(struct timer_list *t) -{ - struct rose_sock *rose = timer_container_of(rose, t, idletimer); - struct sock *sk = &rose->sock; - - bh_lock_sock(sk); - if (sock_owned_by_user(sk)) { - sk_reset_timer(sk, &rose->idletimer, jiffies + HZ/20); - goto out; - } - rose_clear_queues(sk); - - rose_write_internal(sk, ROSE_CLEAR_REQUEST); - rose_sk(sk)->state = ROSE_STATE_2; - - rose_start_t3timer(sk); - - sk->sk_state = TCP_CLOSE; - sk->sk_err = 0; - sk->sk_shutdown |= SEND_SHUTDOWN; - - if (!sock_flag(sk, SOCK_DEAD)) { - sk->sk_state_change(sk); - sock_set_flag(sk, SOCK_DEAD); - } -out: - bh_unlock_sock(sk); - sock_put(sk); -} diff --git a/net/rose/sysctl_net_rose.c b/net/rose/sysctl_net_rose.c deleted file mode 100644 index d801315b7083..000000000000 --- a/net/rose/sysctl_net_rose.c +++ /dev/null @@ -1,125 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * Copyright (C) 1996 Mike Shaver (shaver@zeroknowledge.com) - */ -#include <linux/mm.h> -#include <linux/sysctl.h> -#include <linux/init.h> -#include <net/ax25.h> -#include <net/rose.h> - -static int min_timer[] = {1 * HZ}; -static int max_timer[] = {300 * HZ}; -static int min_idle[] = {0 * HZ}; -static int max_idle[] = {65535 * HZ}; -static int min_route[1], max_route[] = {1}; -static int min_ftimer[] = {60 * HZ}; -static int max_ftimer[] = {600 * HZ}; -static int min_maxvcs[] = {1}, max_maxvcs[] = {254}; -static int min_window[] = {1}, max_window[] = {7}; - -static struct ctl_table_header *rose_table_header; - -static struct ctl_table rose_table[] = { - { - .procname = "restart_request_timeout", - .data = &sysctl_rose_restart_request_timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_timer, - .extra2 = &max_timer - }, - { - .procname = "call_request_timeout", - .data = &sysctl_rose_call_request_timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_timer, - .extra2 = &max_timer - }, - { - .procname = "reset_request_timeout", - .data = &sysctl_rose_reset_request_timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_timer, - .extra2 = &max_timer - }, - { - .procname = "clear_request_timeout", - .data = &sysctl_rose_clear_request_timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_timer, - .extra2 = &max_timer - }, - { - .procname = "no_activity_timeout", - .data = &sysctl_rose_no_activity_timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_idle, - .extra2 = &max_idle - }, - { - .procname = "acknowledge_hold_back_timeout", - .data = &sysctl_rose_ack_hold_back_timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_timer, - .extra2 = &max_timer - }, - { - .procname = "routing_control", - .data = &sysctl_rose_routing_control, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_route, - .extra2 = &max_route - }, - { - .procname = "link_fail_timeout", - .data = &sysctl_rose_link_fail_timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_ftimer, - .extra2 = &max_ftimer - }, - { - .procname = "maximum_virtual_circuits", - .data = &sysctl_rose_maximum_vcs, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_maxvcs, - .extra2 = &max_maxvcs - }, - { - .procname = "window_size", - .data = &sysctl_rose_window_size, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_window, - .extra2 = &max_window - }, -}; - -void __init rose_register_sysctl(void) -{ - rose_table_header = register_net_sysctl(&init_net, "net/rose", rose_table); -} - -void rose_unregister_sysctl(void) -{ - unregister_net_sysctl_table(rose_table_header); -} diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 96ecb83c9071..27c2aa2dd023 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -1486,7 +1486,6 @@ int rxrpc_server_keyring(struct rxrpc_sock *, sockptr_t, int); void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *); void rxrpc_new_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_see_skb(struct sk_buff *, enum rxrpc_skb_trace); -void rxrpc_eaten_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_get_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_free_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_purge_queue(struct sk_buff_head *); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index fec59d9338b9..2b19b252225e 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -332,7 +332,27 @@ bool rxrpc_input_call_event(struct rxrpc_call *call) saw_ack |= sp->hdr.type == RXRPC_PACKET_TYPE_ACK; - rxrpc_input_call_packet(call, skb); + if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && + sp->hdr.securityIndex != 0 && + (skb_cloned(skb) || + skb_has_frag_list(skb) || + skb_has_shared_frag(skb))) { + /* Unshare the packet so that it can be + * modified by in-place decryption. + */ + struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); + + if (nskb) { + rxrpc_new_skb(nskb, rxrpc_skb_new_unshared); + rxrpc_input_call_packet(call, nskb); + rxrpc_free_skb(nskb, rxrpc_skb_put_call_rx); + } else { + /* OOM - Drop the packet. */ + rxrpc_see_skb(skb, rxrpc_skb_see_unshare_nomem); + } + } else { + rxrpc_input_call_packet(call, skb); + } rxrpc_free_skb(skb, rxrpc_skb_put_call_rx); did_receive = true; } diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 9a41ec708aeb..442414d90ba1 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -240,6 +240,34 @@ static void rxrpc_call_is_secure(struct rxrpc_call *call) rxrpc_notify_socket(call); } +static int rxrpc_verify_response(struct rxrpc_connection *conn, + struct sk_buff *skb) +{ + int ret; + + if (skb_cloned(skb) || skb_has_frag_list(skb) || + skb_has_shared_frag(skb)) { + /* Copy the packet if shared so that we can do in-place + * decryption. + */ + struct sk_buff *nskb = skb_copy(skb, GFP_NOFS); + + if (nskb) { + rxrpc_new_skb(nskb, rxrpc_skb_new_unshared); + ret = conn->security->verify_response(conn, nskb); + rxrpc_free_skb(nskb, rxrpc_skb_put_response_copy); + } else { + /* OOM - Drop the packet. */ + rxrpc_see_skb(skb, rxrpc_skb_see_unshare_nomem); + ret = -ENOMEM; + } + } else { + ret = conn->security->verify_response(conn, skb); + } + + return ret; +} + /* * connection-level Rx packet processor */ @@ -270,7 +298,7 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, } spin_unlock_irq(&conn->state_lock); - ret = conn->security->verify_response(conn, skb); + ret = rxrpc_verify_response(conn, skb); if (ret < 0) return ret; @@ -362,7 +390,6 @@ again: static void rxrpc_do_process_connection(struct rxrpc_connection *conn) { struct sk_buff *skb; - int ret; if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events)) rxrpc_secure_connection(conn); @@ -371,17 +398,8 @@ static void rxrpc_do_process_connection(struct rxrpc_connection *conn) * connection that each one has when we've finished with it */ while ((skb = skb_dequeue(&conn->rx_queue))) { rxrpc_see_skb(skb, rxrpc_skb_see_conn_work); - ret = rxrpc_process_event(conn, skb); - switch (ret) { - case -ENOMEM: - case -EAGAIN: - skb_queue_head(&conn->rx_queue, skb); - rxrpc_queue_conn(conn, rxrpc_conn_queue_retry_work); - break; - default: - rxrpc_free_skb(skb, rxrpc_skb_put_conn_work); - break; - } + rxrpc_process_event(conn, skb); + rxrpc_free_skb(skb, rxrpc_skb_put_conn_work); } } diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c index 697956931925..dc5184a2fa9d 100644 --- a/net/rxrpc/io_thread.c +++ b/net/rxrpc/io_thread.c @@ -192,13 +192,12 @@ static bool rxrpc_extract_abort(struct sk_buff *skb) /* * Process packets received on the local endpoint */ -static bool rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) +static bool rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff *skb) { struct rxrpc_connection *conn; struct sockaddr_rxrpc peer_srx; struct rxrpc_skb_priv *sp; struct rxrpc_peer *peer = NULL; - struct sk_buff *skb = *_skb; bool ret = false; skb_pull(skb, sizeof(struct udphdr)); @@ -244,25 +243,6 @@ static bool rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) return rxrpc_bad_message(skb, rxrpc_badmsg_zero_call); if (sp->hdr.seq == 0) return rxrpc_bad_message(skb, rxrpc_badmsg_zero_seq); - - /* Unshare the packet so that it can be modified for in-place - * decryption. - */ - if (sp->hdr.securityIndex != 0) { - skb = skb_unshare(skb, GFP_ATOMIC); - if (!skb) { - rxrpc_eaten_skb(*_skb, rxrpc_skb_eaten_by_unshare_nomem); - *_skb = NULL; - return just_discard; - } - - if (skb != *_skb) { - rxrpc_eaten_skb(*_skb, rxrpc_skb_eaten_by_unshare); - *_skb = skb; - rxrpc_new_skb(skb, rxrpc_skb_new_unshared); - sp = rxrpc_skb(skb); - } - } break; case RXRPC_PACKET_TYPE_CHALLENGE: @@ -494,7 +474,7 @@ int rxrpc_io_thread(void *data) switch (skb->mark) { case RXRPC_SKB_MARK_PACKET: skb->priority = 0; - if (!rxrpc_input_packet(local, &skb)) + if (!rxrpc_input_packet(local, skb)) rxrpc_reject_packet(local, skb); trace_rxrpc_rx_done(skb->mark, skb->priority); rxrpc_free_skb(skb, rxrpc_skb_put_input); diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c index 6301d79ee35a..3ec3d89fdf14 100644 --- a/net/rxrpc/key.c +++ b/net/rxrpc/key.c @@ -502,6 +502,10 @@ static int rxrpc_preparse(struct key_preparsed_payload *prep) if (v1->security_index != RXRPC_SECURITY_RXKAD) goto error; + ret = -EKEYREJECTED; + if (v1->ticket_length > AFSTOKEN_RK_TIX_MAX) + goto error; + plen = sizeof(*token->kad) + v1->ticket_length; prep->quotalen += plen + sizeof(*token); diff --git a/net/rxrpc/rxgk_app.c b/net/rxrpc/rxgk_app.c index 30275cb5ba3e..0ef2a29eb695 100644 --- a/net/rxrpc/rxgk_app.c +++ b/net/rxrpc/rxgk_app.c @@ -214,7 +214,7 @@ int rxgk_extract_token(struct rxrpc_connection *conn, struct sk_buff *skb, ticket_len = ntohl(container.token_len); ticket_offset = token_offset + sizeof(container); - if (xdr_round_up(ticket_len) > token_len - sizeof(container)) + if (ticket_len > xdr_round_down(token_len - sizeof(container))) goto short_packet; _debug("KVNO %u", kvno); @@ -245,6 +245,7 @@ int rxgk_extract_token(struct rxrpc_connection *conn, struct sk_buff *skb, if (ret != -ENOMEM) return rxrpc_abort_conn(conn, skb, ec, ret, rxgk_abort_resp_tok_dec); + return ret; } ret = conn->security->default_decode_ticket(conn, skb, ticket_offset, diff --git a/net/rxrpc/rxgk_common.h b/net/rxrpc/rxgk_common.h index 80164d89e19c..1e257d7ab8ec 100644 --- a/net/rxrpc/rxgk_common.h +++ b/net/rxrpc/rxgk_common.h @@ -34,6 +34,7 @@ struct rxgk_context { }; #define xdr_round_up(x) (round_up((x), sizeof(__be32))) +#define xdr_round_down(x) (round_down((x), sizeof(__be32))) #define xdr_object_len(x) (4 + xdr_round_up(x)) /* diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index eb7f2769d2b1..cba7935977f0 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -510,6 +510,9 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, rxkad_abort_2_short_header); + /* Don't let the crypto algo see a misaligned length. */ + sp->len = round_down(sp->len, 8); + /* Decrypt the skbuff in-place. TODO: We really want to decrypt * directly into the target buffer. */ @@ -543,8 +546,10 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, if (sg != _sg) kfree(sg); if (ret < 0) { - WARN_ON_ONCE(ret != -ENOMEM); - return ret; + if (ret == -ENOMEM) + return ret; + return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, + rxkad_abort_2_crypto_unaligned); } /* Extract the decrypted packet length */ @@ -1136,7 +1141,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, struct rxrpc_crypt session_key; struct key *server_key; time64_t expiry; - void *ticket; + void *ticket = NULL; u32 version, kvno, ticket_len, level; __be32 csum; int ret, i; @@ -1162,13 +1167,13 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, ret = -ENOMEM; response = kzalloc_obj(struct rxkad_response, GFP_NOFS); if (!response) - goto temporary_error; + goto error; if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), response, sizeof(*response)) < 0) { - rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO, - rxkad_abort_resp_short); - goto protocol_error; + ret = rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO, + rxkad_abort_resp_short); + goto error; } version = ntohl(response->version); @@ -1178,62 +1183,62 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, trace_rxrpc_rx_response(conn, sp->hdr.serial, version, kvno, ticket_len); if (version != RXKAD_VERSION) { - rxrpc_abort_conn(conn, skb, RXKADINCONSISTENCY, -EPROTO, - rxkad_abort_resp_version); - goto protocol_error; + ret = rxrpc_abort_conn(conn, skb, RXKADINCONSISTENCY, -EPROTO, + rxkad_abort_resp_version); + goto error; } if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN) { - rxrpc_abort_conn(conn, skb, RXKADTICKETLEN, -EPROTO, - rxkad_abort_resp_tkt_len); - goto protocol_error; + ret = rxrpc_abort_conn(conn, skb, RXKADTICKETLEN, -EPROTO, + rxkad_abort_resp_tkt_len); + goto error; } if (kvno >= RXKAD_TKT_TYPE_KERBEROS_V5) { - rxrpc_abort_conn(conn, skb, RXKADUNKNOWNKEY, -EPROTO, - rxkad_abort_resp_unknown_tkt); - goto protocol_error; + ret = rxrpc_abort_conn(conn, skb, RXKADUNKNOWNKEY, -EPROTO, + rxkad_abort_resp_unknown_tkt); + goto error; } /* extract the kerberos ticket and decrypt and decode it */ ret = -ENOMEM; ticket = kmalloc(ticket_len, GFP_NOFS); if (!ticket) - goto temporary_error_free_resp; + goto error; if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header) + sizeof(*response), ticket, ticket_len) < 0) { - rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO, - rxkad_abort_resp_short_tkt); - goto protocol_error; + ret = rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO, + rxkad_abort_resp_short_tkt); + goto error; } ret = rxkad_decrypt_ticket(conn, server_key, skb, ticket, ticket_len, &session_key, &expiry); if (ret < 0) - goto temporary_error_free_ticket; + goto error; /* use the session key from inside the ticket to decrypt the * response */ ret = rxkad_decrypt_response(conn, response, &session_key); if (ret < 0) - goto temporary_error_free_ticket; + goto error; if (ntohl(response->encrypted.epoch) != conn->proto.epoch || ntohl(response->encrypted.cid) != conn->proto.cid || ntohl(response->encrypted.securityIndex) != conn->security_ix) { - rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, - rxkad_abort_resp_bad_param); - goto protocol_error_free; + ret = rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, + rxkad_abort_resp_bad_param); + goto error; } csum = response->encrypted.checksum; response->encrypted.checksum = 0; rxkad_calc_response_checksum(response); if (response->encrypted.checksum != csum) { - rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, - rxkad_abort_resp_bad_checksum); - goto protocol_error_free; + ret = rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, + rxkad_abort_resp_bad_checksum); + goto error; } for (i = 0; i < RXRPC_MAXCALLS; i++) { @@ -1241,38 +1246,38 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, u32 counter = READ_ONCE(conn->channels[i].call_counter); if (call_id > INT_MAX) { - rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, - rxkad_abort_resp_bad_callid); - goto protocol_error_free; + ret = rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, + rxkad_abort_resp_bad_callid); + goto error; } if (call_id < counter) { - rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, - rxkad_abort_resp_call_ctr); - goto protocol_error_free; + ret = rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, + rxkad_abort_resp_call_ctr); + goto error; } if (call_id > counter) { if (conn->channels[i].call) { - rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, + ret = rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, rxkad_abort_resp_call_state); - goto protocol_error_free; + goto error; } conn->channels[i].call_counter = call_id; } } if (ntohl(response->encrypted.inc_nonce) != conn->rxkad.nonce + 1) { - rxrpc_abort_conn(conn, skb, RXKADOUTOFSEQUENCE, -EPROTO, - rxkad_abort_resp_ooseq); - goto protocol_error_free; + ret = rxrpc_abort_conn(conn, skb, RXKADOUTOFSEQUENCE, -EPROTO, + rxkad_abort_resp_ooseq); + goto error; } level = ntohl(response->encrypted.level); if (level > RXRPC_SECURITY_ENCRYPT) { - rxrpc_abort_conn(conn, skb, RXKADLEVELFAIL, -EPROTO, - rxkad_abort_resp_level); - goto protocol_error_free; + ret = rxrpc_abort_conn(conn, skb, RXKADLEVELFAIL, -EPROTO, + rxkad_abort_resp_level); + goto error; } conn->security_level = level; @@ -1280,31 +1285,12 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, * this the connection security can be handled in exactly the same way * as for a client connection */ ret = rxrpc_get_server_data_key(conn, &session_key, expiry, kvno); - if (ret < 0) - goto temporary_error_free_ticket; - - kfree(ticket); - kfree(response); - _leave(" = 0"); - return 0; - -protocol_error_free: - kfree(ticket); -protocol_error: - kfree(response); - key_put(server_key); - return -EPROTO; -temporary_error_free_ticket: +error: kfree(ticket); -temporary_error_free_resp: kfree(response); -temporary_error: - /* Ignore the response packet if we got a temporary error such as - * ENOMEM. We just want to send the challenge again. Note that we - * also come out this way if the ticket decryption fails. - */ key_put(server_key); + _leave(" = %d", ret); return ret; } diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index 3bcd6ee80396..e2169d1a14b5 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -47,15 +47,6 @@ void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace why) } /* - * Note the dropping of a ref on a socket buffer by the core. - */ -void rxrpc_eaten_skb(struct sk_buff *skb, enum rxrpc_skb_trace why) -{ - int n = atomic_inc_return(&rxrpc_n_rx_skbs); - trace_rxrpc_skb(skb, 0, n, why); -} - -/* * Note the destruction of a socket buffer. */ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace why) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 05e0b14b5773..2c5a7a321a94 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -354,7 +354,7 @@ static int tcf_blockcast_redir(struct sk_buff *skb, struct tcf_mirred *m, goto assign_prev; tcf_mirred_to_dev(skb, m, dev_prev, - dev_is_mac_header_xmit(dev), + dev_is_mac_header_xmit(dev_prev), mirred_eaction, retval); assign_prev: dev_prev = dev; diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index ffea9fbd522d..5862933be8d7 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -399,14 +399,14 @@ static void cake_configure_rates(struct Qdisc *sch, u64 rate, bool rate_adjust); * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32 */ -static void cobalt_newton_step(struct cobalt_vars *vars) +static void cobalt_newton_step(struct cobalt_vars *vars, u32 count) { u32 invsqrt, invsqrt2; u64 val; invsqrt = vars->rec_inv_sqrt; invsqrt2 = ((u64)invsqrt * invsqrt) >> 32; - val = (3LL << 32) - ((u64)vars->count * invsqrt2); + val = (3LL << 32) - ((u64)count * invsqrt2); val >>= 2; /* avoid overflow in following multiply */ val = (val * invsqrt) >> (32 - 2 + 1); @@ -414,12 +414,12 @@ static void cobalt_newton_step(struct cobalt_vars *vars) vars->rec_inv_sqrt = val; } -static void cobalt_invsqrt(struct cobalt_vars *vars) +static void cobalt_invsqrt(struct cobalt_vars *vars, u32 count) { - if (vars->count < REC_INV_SQRT_CACHE) - vars->rec_inv_sqrt = inv_sqrt_cache[vars->count]; + if (count < REC_INV_SQRT_CACHE) + vars->rec_inv_sqrt = inv_sqrt_cache[count]; else - cobalt_newton_step(vars); + cobalt_newton_step(vars, count); } static void cobalt_vars_init(struct cobalt_vars *vars) @@ -449,16 +449,19 @@ static bool cobalt_queue_full(struct cobalt_vars *vars, bool up = false; if (ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) { - up = !vars->p_drop; - vars->p_drop += p->p_inc; - if (vars->p_drop < p->p_inc) - vars->p_drop = ~0; - vars->blue_timer = now; - } - vars->dropping = true; - vars->drop_next = now; + u32 p_drop = vars->p_drop; + + up = !p_drop; + p_drop += p->p_inc; + if (p_drop < p->p_inc) + p_drop = ~0; + WRITE_ONCE(vars->p_drop, p_drop); + WRITE_ONCE(vars->blue_timer, now); + } + WRITE_ONCE(vars->dropping, true); + WRITE_ONCE(vars->drop_next, now); if (!vars->count) - vars->count = 1; + WRITE_ONCE(vars->count, 1); return up; } @@ -475,20 +478,20 @@ static bool cobalt_queue_empty(struct cobalt_vars *vars, if (vars->p_drop && ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) { if (vars->p_drop < p->p_dec) - vars->p_drop = 0; + WRITE_ONCE(vars->p_drop, 0); else - vars->p_drop -= p->p_dec; - vars->blue_timer = now; + WRITE_ONCE(vars->p_drop, vars->p_drop - p->p_dec); + WRITE_ONCE(vars->blue_timer, now); down = !vars->p_drop; } - vars->dropping = false; + WRITE_ONCE(vars->dropping, false); if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) { - vars->count--; - cobalt_invsqrt(vars); - vars->drop_next = cobalt_control(vars->drop_next, - p->interval, - vars->rec_inv_sqrt); + WRITE_ONCE(vars->count, vars->count - 1); + cobalt_invsqrt(vars, vars->count); + WRITE_ONCE(vars->drop_next, + cobalt_control(vars->drop_next, p->interval, + vars->rec_inv_sqrt)); } return down; @@ -507,6 +510,7 @@ static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars, bool next_due, over_target; ktime_t schedule; u64 sojourn; + u32 count; /* The 'schedule' variable records, in its sign, whether 'now' is before or * after 'drop_next'. This allows 'drop_next' to be updated before the next @@ -528,21 +532,22 @@ static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars, over_target = sojourn > p->target && sojourn > p->mtu_time * bulk_flows * 2 && sojourn > p->mtu_time * 4; - next_due = vars->count && ktime_to_ns(schedule) >= 0; + count = vars->count; + next_due = count && ktime_to_ns(schedule) >= 0; vars->ecn_marked = false; if (over_target) { if (!vars->dropping) { - vars->dropping = true; - vars->drop_next = cobalt_control(now, - p->interval, - vars->rec_inv_sqrt); + WRITE_ONCE(vars->dropping, true); + WRITE_ONCE(vars->drop_next, + cobalt_control(now, p->interval, + vars->rec_inv_sqrt)); } - if (!vars->count) - vars->count = 1; + if (!count) + count = 1; } else if (vars->dropping) { - vars->dropping = false; + WRITE_ONCE(vars->dropping, false); } if (next_due && vars->dropping) { @@ -550,23 +555,23 @@ static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars, if (!(vars->ecn_marked = INET_ECN_set_ce(skb))) reason = QDISC_DROP_CONGESTED; - vars->count++; - if (!vars->count) - vars->count--; - cobalt_invsqrt(vars); - vars->drop_next = cobalt_control(vars->drop_next, - p->interval, - vars->rec_inv_sqrt); + count++; + if (!count) + count--; + cobalt_invsqrt(vars, count); + WRITE_ONCE(vars->drop_next, + cobalt_control(vars->drop_next, p->interval, + vars->rec_inv_sqrt)); schedule = ktime_sub(now, vars->drop_next); } else { while (next_due) { - vars->count--; - cobalt_invsqrt(vars); - vars->drop_next = cobalt_control(vars->drop_next, - p->interval, - vars->rec_inv_sqrt); + count--; + cobalt_invsqrt(vars, count); + WRITE_ONCE(vars->drop_next, + cobalt_control(vars->drop_next, p->interval, + vars->rec_inv_sqrt)); schedule = ktime_sub(now, vars->drop_next); - next_due = vars->count && ktime_to_ns(schedule) >= 0; + next_due = count && ktime_to_ns(schedule) >= 0; } } @@ -575,11 +580,12 @@ static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars, get_random_u32() < vars->p_drop) reason = QDISC_DROP_FLOOD_PROTECTION; + WRITE_ONCE(vars->count, count); /* Overload the drop_next field as an activity timeout */ - if (!vars->count) - vars->drop_next = ktime_add_ns(now, p->interval); + if (!count) + WRITE_ONCE(vars->drop_next, ktime_add_ns(now, p->interval)); else if (ktime_to_ns(schedule) > 0 && reason == QDISC_DROP_UNSPEC) - vars->drop_next = now; + WRITE_ONCE(vars->drop_next, now); return reason; } @@ -619,7 +625,7 @@ static bool cake_update_flowkeys(struct flow_keys *keys, } port = rev ? tuple.src.u.all : tuple.dst.u.all; if (port != keys->ports.dst) { - port = keys->ports.dst; + keys->ports.dst = port; upd = true; } } @@ -813,7 +819,7 @@ skip_hash: i++, k = (k + 1) % CAKE_SET_WAYS) { if (q->tags[outer_hash + k] == flow_hash) { if (i) - q->way_hits++; + WRITE_ONCE(q->way_hits, q->way_hits + 1); if (!q->flows[outer_hash + k].set) { /* need to increment host refcnts */ @@ -831,7 +837,7 @@ skip_hash: for (i = 0; i < CAKE_SET_WAYS; i++, k = (k + 1) % CAKE_SET_WAYS) { if (!q->flows[outer_hash + k].set) { - q->way_misses++; + WRITE_ONCE(q->way_misses, q->way_misses + 1); allocate_src = cake_dsrc(flow_mode); allocate_dst = cake_ddst(flow_mode); goto found; @@ -841,7 +847,7 @@ skip_hash: /* With no empty queues, default to the original * queue, accept the collision, update the host tags. */ - q->way_collisions++; + WRITE_ONCE(q->way_collisions, q->way_collisions + 1); allocate_src = cake_dsrc(flow_mode); allocate_dst = cake_ddst(flow_mode); @@ -914,7 +920,7 @@ static struct sk_buff *dequeue_head(struct cake_flow *flow) struct sk_buff *skb = flow->head; if (skb) { - flow->head = skb->next; + WRITE_ONCE(flow->head, skb->next); skb_mark_not_on_list(skb); } @@ -926,7 +932,7 @@ static struct sk_buff *dequeue_head(struct cake_flow *flow) static void flow_queue_add(struct cake_flow *flow, struct sk_buff *skb) { if (!flow->head) - flow->head = skb; + WRITE_ONCE(flow->head, skb); else flow->tail->next = skb; flow->tail = skb; @@ -1357,7 +1363,7 @@ found: if (elig_ack_prev) elig_ack_prev->next = elig_ack->next; else - flow->head = elig_ack->next; + WRITE_ONCE(flow->head, elig_ack->next); skb_mark_not_on_list(elig_ack); @@ -1379,9 +1385,9 @@ static u32 cake_calc_overhead(struct cake_sched_data *qd, u32 len, u32 off) len -= off; if (qd->max_netlen < len) - qd->max_netlen = len; + WRITE_ONCE(qd->max_netlen, len); if (qd->min_netlen > len) - qd->min_netlen = len; + WRITE_ONCE(qd->min_netlen, len); len += q->rate_overhead; @@ -1401,9 +1407,9 @@ static u32 cake_calc_overhead(struct cake_sched_data *qd, u32 len, u32 off) } if (qd->max_adjlen < len) - qd->max_adjlen = len; + WRITE_ONCE(qd->max_adjlen, len); if (qd->min_adjlen > len) - qd->min_adjlen = len; + WRITE_ONCE(qd->min_adjlen, len); return len; } @@ -1416,7 +1422,7 @@ static u32 cake_overhead(struct cake_sched_data *q, const struct sk_buff *skb) u16 segs = qdisc_pkt_segs(skb); u32 len = qdisc_pkt_len(skb); - q->avg_netoff = cake_ewma(q->avg_netoff, off << 16, 8); + WRITE_ONCE(q->avg_netoff, cake_ewma(q->avg_netoff, off << 16, 8)); if (segs == 1) return cake_calc_overhead(q, len, off); @@ -1590,16 +1596,17 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free) } if (cobalt_queue_full(&flow->cvars, &b->cparams, now)) - b->unresponsive_flow_count++; + WRITE_ONCE(b->unresponsive_flow_count, + b->unresponsive_flow_count + 1); len = qdisc_pkt_len(skb); q->buffer_used -= skb->truesize; - b->backlogs[idx] -= len; - b->tin_backlog -= len; + WRITE_ONCE(b->tin_backlog, b->tin_backlog - len); + WRITE_ONCE(b->backlogs[idx], b->backlogs[idx] - len); sch->qstats.backlog -= len; - flow->dropped++; - b->tin_dropped++; + WRITE_ONCE(flow->dropped, flow->dropped + 1); + WRITE_ONCE(b->tin_dropped, b->tin_dropped + 1); if (q->config->rate_flags & CAKE_FLAG_INGRESS) cake_advance_shaper(q, b, skb, now, true); @@ -1795,7 +1802,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, } if (unlikely(len > b->max_skblen)) - b->max_skblen = len; + WRITE_ONCE(b->max_skblen, len); if (qdisc_pkt_segs(skb) > 1 && q->config->rate_flags & CAKE_FLAG_SPLIT_GSO) { struct sk_buff *segs, *nskb; @@ -1819,15 +1826,15 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, numsegs++; slen += segs->len; q->buffer_used += segs->truesize; - b->packets++; + WRITE_ONCE(b->packets, b->packets + 1); } /* stats */ - b->bytes += slen; - b->backlogs[idx] += slen; - b->tin_backlog += slen; sch->qstats.backlog += slen; q->avg_window_bytes += slen; + WRITE_ONCE(b->bytes, b->bytes + slen); + WRITE_ONCE(b->tin_backlog, b->tin_backlog + slen); + WRITE_ONCE(b->backlogs[idx], b->backlogs[idx] + slen); qdisc_tree_reduce_backlog(sch, 1-numsegs, len-slen); consume_skb(skb); @@ -1843,10 +1850,10 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, ack = cake_ack_filter(q, flow); if (ack) { - b->ack_drops++; + WRITE_ONCE(b->ack_drops, b->ack_drops + 1); sch->qstats.drops++; ack_pkt_len = qdisc_pkt_len(ack); - b->bytes += ack_pkt_len; + WRITE_ONCE(b->bytes, b->bytes + ack_pkt_len); q->buffer_used += skb->truesize - ack->truesize; if (q->config->rate_flags & CAKE_FLAG_INGRESS) cake_advance_shaper(q, b, ack, now, true); @@ -1859,12 +1866,12 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, } /* stats */ - b->packets++; - b->bytes += len - ack_pkt_len; - b->backlogs[idx] += len - ack_pkt_len; - b->tin_backlog += len - ack_pkt_len; + WRITE_ONCE(b->packets, b->packets + 1); sch->qstats.backlog += len - ack_pkt_len; q->avg_window_bytes += len - ack_pkt_len; + WRITE_ONCE(b->bytes, b->bytes + len - ack_pkt_len); + WRITE_ONCE(b->tin_backlog, b->tin_backlog + len - ack_pkt_len); + WRITE_ONCE(b->backlogs[idx], b->backlogs[idx] + len - ack_pkt_len); } if (q->overflow_timeout) @@ -1894,9 +1901,9 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, u64 b = q->avg_window_bytes * (u64)NSEC_PER_SEC; b = div64_u64(b, window_interval); - q->avg_peak_bandwidth = - cake_ewma(q->avg_peak_bandwidth, b, - b > q->avg_peak_bandwidth ? 2 : 8); + WRITE_ONCE(q->avg_peak_bandwidth, + cake_ewma(q->avg_peak_bandwidth, b, + b > q->avg_peak_bandwidth ? 2 : 8)); q->avg_window_bytes = 0; q->avg_window_begin = now; @@ -1917,27 +1924,27 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (!flow->set) { list_add_tail(&flow->flowchain, &b->new_flows); } else { - b->decaying_flow_count--; + WRITE_ONCE(b->decaying_flow_count, b->decaying_flow_count - 1); list_move_tail(&flow->flowchain, &b->new_flows); } flow->set = CAKE_SET_SPARSE; - b->sparse_flow_count++; + WRITE_ONCE(b->sparse_flow_count, b->sparse_flow_count + 1); - flow->deficit = cake_get_flow_quantum(b, flow, q->config->flow_mode); + WRITE_ONCE(flow->deficit, cake_get_flow_quantum(b, flow, q->config->flow_mode)); } else if (flow->set == CAKE_SET_SPARSE_WAIT) { /* this flow was empty, accounted as a sparse flow, but actually * in the bulk rotation. */ flow->set = CAKE_SET_BULK; - b->sparse_flow_count--; - b->bulk_flow_count++; + WRITE_ONCE(b->sparse_flow_count, b->sparse_flow_count - 1); + WRITE_ONCE(b->bulk_flow_count, b->bulk_flow_count + 1); cake_inc_srchost_bulk_flow_count(b, flow, q->config->flow_mode); cake_inc_dsthost_bulk_flow_count(b, flow, q->config->flow_mode); } if (q->buffer_used > q->buffer_max_used) - q->buffer_max_used = q->buffer_used; + WRITE_ONCE(q->buffer_max_used, q->buffer_used); if (q->buffer_used <= q->buffer_limit) return NET_XMIT_SUCCESS; @@ -1976,8 +1983,8 @@ static struct sk_buff *cake_dequeue_one(struct Qdisc *sch) if (flow->head) { skb = dequeue_head(flow); len = qdisc_pkt_len(skb); - b->backlogs[q->cur_flow] -= len; - b->tin_backlog -= len; + WRITE_ONCE(b->backlogs[q->cur_flow], b->backlogs[q->cur_flow] - len); + WRITE_ONCE(b->tin_backlog, b->tin_backlog - len); sch->qstats.backlog -= len; q->buffer_used -= skb->truesize; sch->q.qlen--; @@ -2042,7 +2049,7 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch) cake_configure_rates(sch, new_rate, true); q->last_checked_active = now; - q->active_queues = num_active_qs; + WRITE_ONCE(q->active_queues, num_active_qs); } begin: @@ -2149,8 +2156,8 @@ retry: */ if (flow->set == CAKE_SET_SPARSE) { if (flow->head) { - b->sparse_flow_count--; - b->bulk_flow_count++; + WRITE_ONCE(b->sparse_flow_count, b->sparse_flow_count - 1); + WRITE_ONCE(b->bulk_flow_count, b->bulk_flow_count + 1); cake_inc_srchost_bulk_flow_count(b, flow, q->config->flow_mode); cake_inc_dsthost_bulk_flow_count(b, flow, q->config->flow_mode); @@ -2165,7 +2172,8 @@ retry: } } - flow->deficit += cake_get_flow_quantum(b, flow, q->config->flow_mode); + WRITE_ONCE(flow->deficit, + flow->deficit + cake_get_flow_quantum(b, flow, q->config->flow_mode)); list_move_tail(&flow->flowchain, &b->old_flows); goto retry; @@ -2177,7 +2185,8 @@ retry: if (!skb) { /* this queue was actually empty */ if (cobalt_queue_empty(&flow->cvars, &b->cparams, now)) - b->unresponsive_flow_count--; + WRITE_ONCE(b->unresponsive_flow_count, + b->unresponsive_flow_count - 1); if (flow->cvars.p_drop || flow->cvars.count || ktime_before(now, flow->cvars.drop_next)) { @@ -2187,32 +2196,32 @@ retry: list_move_tail(&flow->flowchain, &b->decaying_flows); if (flow->set == CAKE_SET_BULK) { - b->bulk_flow_count--; + WRITE_ONCE(b->bulk_flow_count, b->bulk_flow_count - 1); cake_dec_srchost_bulk_flow_count(b, flow, q->config->flow_mode); cake_dec_dsthost_bulk_flow_count(b, flow, q->config->flow_mode); - b->decaying_flow_count++; + WRITE_ONCE(b->decaying_flow_count, b->decaying_flow_count + 1); } else if (flow->set == CAKE_SET_SPARSE || flow->set == CAKE_SET_SPARSE_WAIT) { - b->sparse_flow_count--; - b->decaying_flow_count++; + WRITE_ONCE(b->sparse_flow_count, b->sparse_flow_count - 1); + WRITE_ONCE(b->decaying_flow_count, b->decaying_flow_count + 1); } flow->set = CAKE_SET_DECAYING; } else { /* remove empty queue from the flowchain */ list_del_init(&flow->flowchain); if (flow->set == CAKE_SET_SPARSE || - flow->set == CAKE_SET_SPARSE_WAIT) - b->sparse_flow_count--; - else if (flow->set == CAKE_SET_BULK) { - b->bulk_flow_count--; + flow->set == CAKE_SET_SPARSE_WAIT) { + WRITE_ONCE(b->sparse_flow_count, b->sparse_flow_count - 1); + } else if (flow->set == CAKE_SET_BULK) { + WRITE_ONCE(b->bulk_flow_count, b->bulk_flow_count - 1); cake_dec_srchost_bulk_flow_count(b, flow, q->config->flow_mode); cake_dec_dsthost_bulk_flow_count(b, flow, q->config->flow_mode); - } else - b->decaying_flow_count--; - + } else { + WRITE_ONCE(b->decaying_flow_count, b->decaying_flow_count - 1); + } flow->set = CAKE_SET_NONE; } goto begin; @@ -2230,11 +2239,11 @@ retry: if (q->config->rate_flags & CAKE_FLAG_INGRESS) { len = cake_advance_shaper(q, b, skb, now, true); - flow->deficit -= len; + WRITE_ONCE(flow->deficit, flow->deficit - len); b->tin_deficit -= len; } - flow->dropped++; - b->tin_dropped++; + WRITE_ONCE(flow->dropped, flow->dropped + 1); + WRITE_ONCE(b->tin_dropped, b->tin_dropped + 1); qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb)); qdisc_qstats_drop(sch); qdisc_dequeue_drop(sch, skb, reason); @@ -2242,20 +2251,22 @@ retry: goto retry; } - b->tin_ecn_mark += !!flow->cvars.ecn_marked; + WRITE_ONCE(b->tin_ecn_mark, b->tin_ecn_mark + !!flow->cvars.ecn_marked); qdisc_bstats_update(sch, skb); WRITE_ONCE(q->last_active, now); /* collect delay stats */ delay = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb))); - b->avge_delay = cake_ewma(b->avge_delay, delay, 8); - b->peak_delay = cake_ewma(b->peak_delay, delay, - delay > b->peak_delay ? 2 : 8); - b->base_delay = cake_ewma(b->base_delay, delay, - delay < b->base_delay ? 2 : 8); + WRITE_ONCE(b->avge_delay, cake_ewma(b->avge_delay, delay, 8)); + WRITE_ONCE(b->peak_delay, + cake_ewma(b->peak_delay, delay, + delay > b->peak_delay ? 2 : 8)); + WRITE_ONCE(b->base_delay, + cake_ewma(b->base_delay, delay, + delay < b->base_delay ? 2 : 8)); len = cake_advance_shaper(q, b, skb, now, false); - flow->deficit -= len; + WRITE_ONCE(flow->deficit, flow->deficit - len); b->tin_deficit -= len; if (ktime_after(q->time_next_packet, now) && sch->q.qlen) { @@ -2329,9 +2340,9 @@ static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu, u8 rate_shft = 0; u64 rate_ns = 0; - b->flow_quantum = 1514; if (rate) { - b->flow_quantum = max(min(rate >> 12, 1514ULL), 300ULL); + WRITE_ONCE(b->flow_quantum, + max(min(rate >> 12, 1514ULL), 300ULL)); rate_shft = 34; rate_ns = ((u64)NSEC_PER_SEC) << rate_shft; rate_ns = div64_u64(rate_ns, max(MIN_RATE, rate)); @@ -2339,9 +2350,11 @@ static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu, rate_ns >>= 1; rate_shft--; } - } /* else unlimited, ie. zero delay */ - - b->tin_rate_bps = rate; + } else { + /* else unlimited, ie. zero delay */ + WRITE_ONCE(b->flow_quantum, 1514); + } + WRITE_ONCE(b->tin_rate_bps, rate); b->tin_rate_ns = rate_ns; b->tin_rate_shft = rate_shft; @@ -2350,10 +2363,11 @@ static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu, byte_target_ns = (byte_target * rate_ns) >> rate_shft; - b->cparams.target = max((byte_target_ns * 3) / 2, target_ns); - b->cparams.interval = max(rtt_est_ns + - b->cparams.target - target_ns, - b->cparams.target * 2); + WRITE_ONCE(b->cparams.target, + max((byte_target_ns * 3) / 2, target_ns)); + WRITE_ONCE(b->cparams.interval, + max(rtt_est_ns + b->cparams.target - target_ns, + b->cparams.target * 2)); b->cparams.mtu_time = byte_target_ns; b->cparams.p_inc = 1 << 24; /* 1/256 */ b->cparams.p_dec = 1 << 20; /* 1/4096 */ @@ -2611,25 +2625,27 @@ static void cake_reconfigure(struct Qdisc *sch) { struct cake_sched_data *qd = qdisc_priv(sch); struct cake_sched_config *q = qd->config; + u32 buffer_limit; cake_configure_rates(sch, qd->config->rate_bps, false); if (q->buffer_config_limit) { - qd->buffer_limit = q->buffer_config_limit; + buffer_limit = q->buffer_config_limit; } else if (q->rate_bps) { u64 t = q->rate_bps * q->interval; do_div(t, USEC_PER_SEC / 4); - qd->buffer_limit = max_t(u32, t, 4U << 20); + buffer_limit = max_t(u32, t, 4U << 20); } else { - qd->buffer_limit = ~0; + buffer_limit = ~0; } sch->flags &= ~TCQ_F_CAN_BYPASS; - qd->buffer_limit = min(qd->buffer_limit, - max(sch->limit * psched_mtu(qdisc_dev(sch)), - q->buffer_config_limit)); + WRITE_ONCE(qd->buffer_limit, + min(buffer_limit, + max(sch->limit * psched_mtu(qdisc_dev(sch)), + q->buffer_config_limit))); } static int cake_config_change(struct cake_sched_config *q, struct nlattr *opt, @@ -2774,10 +2790,10 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt, return ret; if (overhead_changed) { - qd->max_netlen = 0; - qd->max_adjlen = 0; - qd->min_netlen = ~0; - qd->min_adjlen = ~0; + WRITE_ONCE(qd->max_netlen, 0); + WRITE_ONCE(qd->max_adjlen, 0); + WRITE_ONCE(qd->min_netlen, ~0); + WRITE_ONCE(qd->min_adjlen, ~0); } if (qd->tins) { @@ -2995,15 +3011,15 @@ static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d) goto nla_put_failure; \ } while (0) - PUT_STAT_U64(CAPACITY_ESTIMATE64, q->avg_peak_bandwidth); - PUT_STAT_U32(MEMORY_LIMIT, q->buffer_limit); - PUT_STAT_U32(MEMORY_USED, q->buffer_max_used); - PUT_STAT_U32(AVG_NETOFF, ((q->avg_netoff + 0x8000) >> 16)); - PUT_STAT_U32(MAX_NETLEN, q->max_netlen); - PUT_STAT_U32(MAX_ADJLEN, q->max_adjlen); - PUT_STAT_U32(MIN_NETLEN, q->min_netlen); - PUT_STAT_U32(MIN_ADJLEN, q->min_adjlen); - PUT_STAT_U32(ACTIVE_QUEUES, q->active_queues); + PUT_STAT_U64(CAPACITY_ESTIMATE64, READ_ONCE(q->avg_peak_bandwidth)); + PUT_STAT_U32(MEMORY_LIMIT, READ_ONCE(q->buffer_limit)); + PUT_STAT_U32(MEMORY_USED, READ_ONCE(q->buffer_max_used)); + PUT_STAT_U32(AVG_NETOFF, ((READ_ONCE(q->avg_netoff) + 0x8000) >> 16)); + PUT_STAT_U32(MAX_NETLEN, READ_ONCE(q->max_netlen)); + PUT_STAT_U32(MAX_ADJLEN, READ_ONCE(q->max_adjlen)); + PUT_STAT_U32(MIN_NETLEN, READ_ONCE(q->min_netlen)); + PUT_STAT_U32(MIN_ADJLEN, READ_ONCE(q->min_adjlen)); + PUT_STAT_U32(ACTIVE_QUEUES, READ_ONCE(q->active_queues)); #undef PUT_STAT_U32 #undef PUT_STAT_U64 @@ -3029,38 +3045,38 @@ static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d) if (!ts) goto nla_put_failure; - PUT_TSTAT_U64(THRESHOLD_RATE64, b->tin_rate_bps); - PUT_TSTAT_U64(SENT_BYTES64, b->bytes); - PUT_TSTAT_U32(BACKLOG_BYTES, b->tin_backlog); + PUT_TSTAT_U64(THRESHOLD_RATE64, READ_ONCE(b->tin_rate_bps)); + PUT_TSTAT_U64(SENT_BYTES64, READ_ONCE(b->bytes)); + PUT_TSTAT_U32(BACKLOG_BYTES, READ_ONCE(b->tin_backlog)); PUT_TSTAT_U32(TARGET_US, - ktime_to_us(ns_to_ktime(b->cparams.target))); + ktime_to_us(ns_to_ktime(READ_ONCE(b->cparams.target)))); PUT_TSTAT_U32(INTERVAL_US, - ktime_to_us(ns_to_ktime(b->cparams.interval))); + ktime_to_us(ns_to_ktime(READ_ONCE(b->cparams.interval)))); - PUT_TSTAT_U32(SENT_PACKETS, b->packets); - PUT_TSTAT_U32(DROPPED_PACKETS, b->tin_dropped); - PUT_TSTAT_U32(ECN_MARKED_PACKETS, b->tin_ecn_mark); - PUT_TSTAT_U32(ACKS_DROPPED_PACKETS, b->ack_drops); + PUT_TSTAT_U32(SENT_PACKETS, READ_ONCE(b->packets)); + PUT_TSTAT_U32(DROPPED_PACKETS, READ_ONCE(b->tin_dropped)); + PUT_TSTAT_U32(ECN_MARKED_PACKETS, READ_ONCE(b->tin_ecn_mark)); + PUT_TSTAT_U32(ACKS_DROPPED_PACKETS, READ_ONCE(b->ack_drops)); PUT_TSTAT_U32(PEAK_DELAY_US, - ktime_to_us(ns_to_ktime(b->peak_delay))); + ktime_to_us(ns_to_ktime(READ_ONCE(b->peak_delay)))); PUT_TSTAT_U32(AVG_DELAY_US, - ktime_to_us(ns_to_ktime(b->avge_delay))); + ktime_to_us(ns_to_ktime(READ_ONCE(b->avge_delay)))); PUT_TSTAT_U32(BASE_DELAY_US, - ktime_to_us(ns_to_ktime(b->base_delay))); + ktime_to_us(ns_to_ktime(READ_ONCE(b->base_delay)))); - PUT_TSTAT_U32(WAY_INDIRECT_HITS, b->way_hits); - PUT_TSTAT_U32(WAY_MISSES, b->way_misses); - PUT_TSTAT_U32(WAY_COLLISIONS, b->way_collisions); + PUT_TSTAT_U32(WAY_INDIRECT_HITS, READ_ONCE(b->way_hits)); + PUT_TSTAT_U32(WAY_MISSES, READ_ONCE(b->way_misses)); + PUT_TSTAT_U32(WAY_COLLISIONS, READ_ONCE(b->way_collisions)); - PUT_TSTAT_U32(SPARSE_FLOWS, b->sparse_flow_count + - b->decaying_flow_count); - PUT_TSTAT_U32(BULK_FLOWS, b->bulk_flow_count); - PUT_TSTAT_U32(UNRESPONSIVE_FLOWS, b->unresponsive_flow_count); - PUT_TSTAT_U32(MAX_SKBLEN, b->max_skblen); + PUT_TSTAT_U32(SPARSE_FLOWS, READ_ONCE(b->sparse_flow_count) + + READ_ONCE(b->decaying_flow_count)); + PUT_TSTAT_U32(BULK_FLOWS, READ_ONCE(b->bulk_flow_count)); + PUT_TSTAT_U32(UNRESPONSIVE_FLOWS, READ_ONCE(b->unresponsive_flow_count)); + PUT_TSTAT_U32(MAX_SKBLEN, READ_ONCE(b->max_skblen)); - PUT_TSTAT_U32(FLOW_QUANTUM, b->flow_quantum); + PUT_TSTAT_U32(FLOW_QUANTUM, READ_ONCE(b->flow_quantum)); nla_nest_end(d->skb, ts); } @@ -3128,7 +3144,7 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl, flow = &b->flows[idx % CAKE_QUEUES]; - if (flow->head) { + if (READ_ONCE(flow->head)) { sch_tree_lock(sch); skb = flow->head; while (skb) { @@ -3137,13 +3153,15 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl, } sch_tree_unlock(sch); } - qs.backlog = b->backlogs[idx % CAKE_QUEUES]; - qs.drops = flow->dropped; + qs.backlog = READ_ONCE(b->backlogs[idx % CAKE_QUEUES]); + qs.drops = READ_ONCE(flow->dropped); } if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0) return -1; if (flow) { ktime_t now = ktime_get(); + bool dropping; + u32 p_drop; stats = nla_nest_start_noflag(d->skb, TCA_STATS_APP); if (!stats) @@ -3158,21 +3176,23 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl, goto nla_put_failure; \ } while (0) - PUT_STAT_S32(DEFICIT, flow->deficit); - PUT_STAT_U32(DROPPING, flow->cvars.dropping); - PUT_STAT_U32(COBALT_COUNT, flow->cvars.count); - PUT_STAT_U32(P_DROP, flow->cvars.p_drop); - if (flow->cvars.p_drop) { + PUT_STAT_S32(DEFICIT, READ_ONCE(flow->deficit)); + dropping = READ_ONCE(flow->cvars.dropping); + PUT_STAT_U32(DROPPING, dropping); + PUT_STAT_U32(COBALT_COUNT, READ_ONCE(flow->cvars.count)); + p_drop = READ_ONCE(flow->cvars.p_drop); + PUT_STAT_U32(P_DROP, p_drop); + if (p_drop) { PUT_STAT_S32(BLUE_TIMER_US, ktime_to_us( ktime_sub(now, - flow->cvars.blue_timer))); + READ_ONCE(flow->cvars.blue_timer)))); } - if (flow->cvars.dropping) { + if (dropping) { PUT_STAT_S32(DROP_NEXT_US, ktime_to_us( ktime_sub(now, - flow->cvars.drop_next))); + READ_ONCE(flow->cvars.drop_next)))); } if (nla_nest_end(d->skb, stats) < 0) @@ -3298,10 +3318,10 @@ static int cake_mq_change(struct Qdisc *sch, struct nlattr *opt, struct cake_sched_data *qd = qdisc_priv(chld); if (overhead_changed) { - qd->max_netlen = 0; - qd->max_adjlen = 0; - qd->min_netlen = ~0; - qd->min_adjlen = ~0; + WRITE_ONCE(qd->max_netlen, 0); + WRITE_ONCE(qd->max_adjlen, 0); + WRITE_ONCE(qd->min_netlen, ~0); + WRITE_ONCE(qd->min_adjlen, ~0); } if (qd->tins) { diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index 8c9a0400c862..0f953bd46b58 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -243,6 +243,20 @@ static struct sk_buff *cbs_dequeue(struct Qdisc *sch) return q->dequeue(sch); } +static void cbs_reset(struct Qdisc *sch) +{ + struct cbs_sched_data *q = qdisc_priv(sch); + + /* Nothing to do if we couldn't create the underlying qdisc */ + if (!q->qdisc) + return; + + qdisc_reset(q->qdisc); + qdisc_watchdog_cancel(&q->watchdog); + q->credits = 0; + q->last = 0; +} + static const struct nla_policy cbs_policy[TCA_CBS_MAX + 1] = { [TCA_CBS_PARMS] = { .len = sizeof(struct tc_cbs_qopt) }, }; @@ -540,7 +554,7 @@ static struct Qdisc_ops cbs_qdisc_ops __read_mostly = { .dequeue = cbs_dequeue, .peek = qdisc_peek_dequeued, .init = cbs_init, - .reset = qdisc_reset_queue, + .reset = cbs_reset, .destroy = cbs_destroy, .change = cbs_change, .dump = cbs_dump, diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 94df8e741a97..2875bcdb18a4 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -229,7 +229,7 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* Draw a packet at random from queue and compare flow */ if (choke_match_random(q, skb, &idx)) { - q->stats.matched++; + WRITE_ONCE(q->stats.matched, q->stats.matched + 1); choke_drop_by_idx(sch, idx, to_free); goto congestion_drop; } @@ -241,11 +241,13 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch, qdisc_qstats_overlimit(sch); if (use_harddrop(q) || !use_ecn(q) || !INET_ECN_set_ce(skb)) { - q->stats.forced_drop++; + WRITE_ONCE(q->stats.forced_drop, + q->stats.forced_drop + 1); goto congestion_drop; } - q->stats.forced_mark++; + WRITE_ONCE(q->stats.forced_mark, + q->stats.forced_mark + 1); } else if (++q->vars.qcount) { if (red_mark_probability(p, &q->vars, q->vars.qavg)) { q->vars.qcount = 0; @@ -253,11 +255,13 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch, qdisc_qstats_overlimit(sch); if (!use_ecn(q) || !INET_ECN_set_ce(skb)) { - q->stats.prob_drop++; + WRITE_ONCE(q->stats.prob_drop, + q->stats.prob_drop + 1); goto congestion_drop; } - q->stats.prob_mark++; + WRITE_ONCE(q->stats.prob_mark, + q->stats.prob_mark + 1); } } else q->vars.qR = red_random(p); @@ -272,7 +276,7 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch, return NET_XMIT_SUCCESS; } - q->stats.pdrop++; + WRITE_ONCE(q->stats.pdrop, q->stats.pdrop + 1); return qdisc_drop(skb, sch, to_free); congestion_drop: @@ -461,10 +465,12 @@ static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct choke_sched_data *q = qdisc_priv(sch); struct tc_choke_xstats st = { - .early = q->stats.prob_drop + q->stats.forced_drop, - .marked = q->stats.prob_mark + q->stats.forced_mark, - .pdrop = q->stats.pdrop, - .matched = q->stats.matched, + .early = READ_ONCE(q->stats.prob_drop) + + READ_ONCE(q->stats.forced_drop), + .marked = READ_ONCE(q->stats.prob_mark) + + READ_ONCE(q->stats.forced_mark), + .pdrop = READ_ONCE(q->stats.pdrop), + .matched = READ_ONCE(q->stats.matched), }; return gnet_stats_copy_app(d, &st, sizeof(st)); diff --git a/net/sched/sch_dualpi2.c b/net/sched/sch_dualpi2.c index fe6f5e889625..a22489c14458 100644 --- a/net/sched/sch_dualpi2.c +++ b/net/sched/sch_dualpi2.c @@ -868,11 +868,35 @@ static int dualpi2_change(struct Qdisc *sch, struct nlattr *opt, old_backlog = sch->qstats.backlog; while (qdisc_qlen(sch) > sch->limit || q->memory_used > q->memory_limit) { - struct sk_buff *skb = qdisc_dequeue_internal(sch, true); + struct sk_buff *skb = NULL; - q->memory_used -= skb->truesize; - qdisc_qstats_backlog_dec(sch, skb); - rtnl_qdisc_drop(skb, sch); + if (qdisc_qlen(sch) > qdisc_qlen(q->l_queue)) { + skb = qdisc_dequeue_internal(sch, true); + if (unlikely(!skb)) { + WARN_ON_ONCE(1); + break; + } + q->memory_used -= skb->truesize; + rtnl_qdisc_drop(skb, sch); + } else if (qdisc_qlen(q->l_queue)) { + skb = qdisc_dequeue_internal(q->l_queue, true); + if (unlikely(!skb)) { + WARN_ON_ONCE(1); + break; + } + /* L-queue packets are counted in both sch and + * l_queue on enqueue; qdisc_dequeue_internal() + * handled l_queue, so we further account for sch. + */ + --sch->q.qlen; + qdisc_qstats_backlog_dec(sch, skb); + q->memory_used -= skb->truesize; + rtnl_qdisc_drop(skb, q->l_queue); + qdisc_qstats_drop(sch); + } else { + WARN_ON_ONCE(1); + break; + } } qdisc_tree_reduce_backlog(sch, old_qlen - qdisc_qlen(sch), old_backlog - sch->qstats.backlog); @@ -914,6 +938,8 @@ static int dualpi2_init(struct Qdisc *sch, struct nlattr *opt, int err; sch->flags |= TCQ_F_DEQUEUE_DROPS; + hrtimer_setup(&q->pi2_timer, dualpi2_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_ABS_PINNED_SOFT); q->l_queue = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, TC_H_MAKE(sch->handle, 1), extack); @@ -926,8 +952,6 @@ static int dualpi2_init(struct Qdisc *sch, struct nlattr *opt, q->sch = sch; dualpi2_reset_default(sch); - hrtimer_setup(&q->pi2_timer, dualpi2_timer, CLOCK_MONOTONIC, - HRTIMER_MODE_ABS_PINNED_SOFT); if (opt && nla_len(opt)) { err = dualpi2_change(sch, opt, extack); diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 2a3d758f67ab..24db54684e8a 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -117,7 +117,7 @@ static inline struct sk_buff *dequeue_head(struct fq_codel_flow *flow) { struct sk_buff *skb = flow->head; - flow->head = skb->next; + WRITE_ONCE(flow->head, skb->next); skb_mark_not_on_list(skb); return skb; } @@ -127,7 +127,7 @@ static inline void flow_queue_add(struct fq_codel_flow *flow, struct sk_buff *skb) { if (flow->head == NULL) - flow->head = skb; + WRITE_ONCE(flow->head, skb); else flow->tail->next = skb; flow->tail = skb; @@ -173,8 +173,8 @@ static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets, } while (++i < max_packets && len < threshold); /* Tell codel to increase its signal strength also */ - flow->cvars.count += i; - q->backlogs[idx] -= len; + WRITE_ONCE(flow->cvars.count, flow->cvars.count + i); + WRITE_ONCE(q->backlogs[idx], q->backlogs[idx] - len); q->memory_usage -= mem; sch->qstats.drops += i; sch->qstats.backlog -= len; @@ -204,13 +204,13 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch, codel_set_enqueue_time(skb); flow = &q->flows[idx]; flow_queue_add(flow, skb); - q->backlogs[idx] += qdisc_pkt_len(skb); + WRITE_ONCE(q->backlogs[idx], q->backlogs[idx] + qdisc_pkt_len(skb)); qdisc_qstats_backlog_inc(sch, skb); if (list_empty(&flow->flowchain)) { list_add_tail(&flow->flowchain, &q->new_flows); q->new_flow_count++; - flow->deficit = q->quantum; + WRITE_ONCE(flow->deficit, q->quantum); } get_codel_cb(skb)->mem_usage = skb->truesize; q->memory_usage += get_codel_cb(skb)->mem_usage; @@ -263,7 +263,8 @@ static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx) flow = container_of(vars, struct fq_codel_flow, cvars); if (flow->head) { skb = dequeue_head(flow); - q->backlogs[flow - q->flows] -= qdisc_pkt_len(skb); + WRITE_ONCE(q->backlogs[flow - q->flows], + q->backlogs[flow - q->flows] - qdisc_pkt_len(skb)); q->memory_usage -= get_codel_cb(skb)->mem_usage; sch->q.qlen--; sch->qstats.backlog -= qdisc_pkt_len(skb); @@ -296,7 +297,7 @@ begin: flow = list_first_entry(head, struct fq_codel_flow, flowchain); if (flow->deficit <= 0) { - flow->deficit += q->quantum; + WRITE_ONCE(flow->deficit, flow->deficit + q->quantum); list_move_tail(&flow->flowchain, &q->old_flows); goto begin; } @@ -314,7 +315,7 @@ begin: goto begin; } qdisc_bstats_update(sch, skb); - flow->deficit -= qdisc_pkt_len(skb); + WRITE_ONCE(flow->deficit, flow->deficit - qdisc_pkt_len(skb)); if (q->cstats.drop_count) { qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, @@ -328,7 +329,7 @@ begin: static void fq_codel_flow_purge(struct fq_codel_flow *flow) { rtnl_kfree_skbs(flow->head, flow->tail); - flow->head = NULL; + WRITE_ONCE(flow->head, NULL); } static void fq_codel_reset(struct Qdisc *sch) @@ -585,6 +586,8 @@ static int fq_codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d) }; struct list_head *pos; + sch_tree_lock(sch); + st.qdisc_stats.maxpacket = q->cstats.maxpacket; st.qdisc_stats.drop_overlimit = q->drop_overlimit; st.qdisc_stats.ecn_mark = q->cstats.ecn_mark; @@ -593,7 +596,6 @@ static int fq_codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d) st.qdisc_stats.memory_usage = q->memory_usage; st.qdisc_stats.drop_overmemory = q->drop_overmemory; - sch_tree_lock(sch); list_for_each(pos, &q->new_flows) st.qdisc_stats.new_flows_len++; @@ -655,21 +657,21 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl, memset(&xstats, 0, sizeof(xstats)); xstats.type = TCA_FQ_CODEL_XSTATS_CLASS; - xstats.class_stats.deficit = flow->deficit; + xstats.class_stats.deficit = READ_ONCE(flow->deficit); xstats.class_stats.ldelay = - codel_time_to_us(flow->cvars.ldelay); - xstats.class_stats.count = flow->cvars.count; - xstats.class_stats.lastcount = flow->cvars.lastcount; - xstats.class_stats.dropping = flow->cvars.dropping; - if (flow->cvars.dropping) { - codel_tdiff_t delta = flow->cvars.drop_next - + codel_time_to_us(READ_ONCE(flow->cvars.ldelay)); + xstats.class_stats.count = READ_ONCE(flow->cvars.count); + xstats.class_stats.lastcount = READ_ONCE(flow->cvars.lastcount); + xstats.class_stats.dropping = READ_ONCE(flow->cvars.dropping); + if (xstats.class_stats.dropping) { + codel_tdiff_t delta = READ_ONCE(flow->cvars.drop_next) - codel_get_time(); xstats.class_stats.drop_next = (delta >= 0) ? codel_time_to_us(delta) : -codel_time_to_us(-delta); } - if (flow->head) { + if (READ_ONCE(flow->head)) { sch_tree_lock(sch); skb = flow->head; while (skb) { @@ -678,7 +680,7 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl, } sch_tree_unlock(sch); } - qs.backlog = q->backlogs[idx]; + qs.backlog = READ_ONCE(q->backlogs[idx]); qs.drops = 0; } if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0) diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index 154c70f489f2..7becbf5362b3 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -509,18 +509,19 @@ nla_put_failure: static int fq_pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct fq_pie_sched_data *q = qdisc_priv(sch); - struct tc_fq_pie_xstats st = { - .packets_in = q->stats.packets_in, - .overlimit = q->stats.overlimit, - .overmemory = q->overmemory, - .dropped = q->stats.dropped, - .ecn_mark = q->stats.ecn_mark, - .new_flow_count = q->new_flow_count, - .memory_usage = q->memory_usage, - }; + struct tc_fq_pie_xstats st = { 0 }; struct list_head *pos; sch_tree_lock(sch); + + st.packets_in = q->stats.packets_in; + st.overlimit = q->stats.overlimit; + st.overmemory = q->overmemory; + st.dropped = q->stats.dropped; + st.ecn_mark = q->stats.ecn_mark; + st.new_flow_count = q->new_flow_count; + st.memory_usage = q->memory_usage; + list_for_each(pos, &q->new_flows) st.new_flows_len++; diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 95e5d9bfd9c8..96021f52d835 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -198,7 +198,8 @@ static struct hh_flow_state *seek_list(const u32 hash, return NULL; list_del(&flow->flowchain); kfree(flow); - q->hh_flows_current_cnt--; + WRITE_ONCE(q->hh_flows_current_cnt, + q->hh_flows_current_cnt - 1); } else if (flow->hash_id == hash) { return flow; } @@ -226,7 +227,7 @@ static struct hh_flow_state *alloc_new_hh(struct list_head *head, } if (q->hh_flows_current_cnt >= q->hh_flows_limit) { - q->hh_flows_overlimit++; + WRITE_ONCE(q->hh_flows_overlimit, q->hh_flows_overlimit + 1); return NULL; } /* Create new entry. */ @@ -234,7 +235,7 @@ static struct hh_flow_state *alloc_new_hh(struct list_head *head, if (!flow) return NULL; - q->hh_flows_current_cnt++; + WRITE_ONCE(q->hh_flows_current_cnt, q->hh_flows_current_cnt + 1); INIT_LIST_HEAD(&flow->flowchain); list_add_tail(&flow->flowchain, head); @@ -309,7 +310,7 @@ static enum wdrr_bucket_idx hhf_classify(struct sk_buff *skb, struct Qdisc *sch) return WDRR_BUCKET_FOR_NON_HH; flow->hash_id = hash; flow->hit_timestamp = now; - q->hh_flows_total_cnt++; + WRITE_ONCE(q->hh_flows_total_cnt, q->hh_flows_total_cnt + 1); /* By returning without updating counters in q->hhf_arrays, * we implicitly implement "shielding" (see Optimization O1). @@ -403,7 +404,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch, return NET_XMIT_SUCCESS; prev_backlog = sch->qstats.backlog; - q->drop_overlimit++; + WRITE_ONCE(q->drop_overlimit, q->drop_overlimit + 1); /* Return Congestion Notification only if we dropped a packet from this * bucket. */ @@ -686,10 +687,10 @@ static int hhf_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct hhf_sched_data *q = qdisc_priv(sch); struct tc_hhf_xstats st = { - .drop_overlimit = q->drop_overlimit, - .hh_overlimit = q->hh_flows_overlimit, - .hh_tot_count = q->hh_flows_total_cnt, - .hh_cur_count = q->hh_flows_current_cnt, + .drop_overlimit = READ_ONCE(q->drop_overlimit), + .hh_overlimit = READ_ONCE(q->hh_flows_overlimit), + .hh_tot_count = READ_ONCE(q->hh_flows_total_cnt), + .hh_cur_count = READ_ONCE(q->hh_flows_current_cnt), }; return gnet_stats_copy_app(d, &st, sizeof(st)); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 20df1c08b1e9..bc18e1976b6e 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -227,10 +227,10 @@ static bool loss_4state(struct netem_sched_data *q) if (rnd < clg->a4) { clg->state = LOST_IN_GAP_PERIOD; return true; - } else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) { + } else if (rnd < clg->a1 + clg->a4) { clg->state = LOST_IN_BURST_PERIOD; return true; - } else if (clg->a1 + clg->a4 < rnd) { + } else { clg->state = TX_IN_GAP_PERIOD; } @@ -247,9 +247,9 @@ static bool loss_4state(struct netem_sched_data *q) case LOST_IN_BURST_PERIOD: if (rnd < clg->a3) clg->state = TX_IN_BURST_PERIOD; - else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) { + else if (rnd < clg->a2 + clg->a3) { clg->state = TX_IN_GAP_PERIOD; - } else if (clg->a2 + clg->a3 < rnd) { + } else { clg->state = LOST_IN_BURST_PERIOD; return true; } @@ -524,7 +524,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, 1 << get_random_u32_below(8); } - if (unlikely(q->t_len >= sch->limit)) { + if (unlikely(sch->q.qlen >= sch->limit)) { /* re-link segs, so that qdisc_drop_all() frees them all */ skb->next = segs; qdisc_drop_all(skb, sch, to_free); @@ -659,9 +659,8 @@ static void get_slot_next(struct netem_sched_data *q, u64 now) if (!q->slot_dist) next_delay = q->slot_config.min_delay + - (get_random_u32() * - (q->slot_config.max_delay - - q->slot_config.min_delay) >> 32); + mul_u64_u32_shr(q->slot_config.max_delay - q->slot_config.min_delay, + get_random_u32(), 32); else next_delay = tabledist(q->slot_config.dist_delay, (s32)(q->slot_config.dist_jitter), @@ -827,6 +826,39 @@ static int get_dist_table(struct disttable **tbl, const struct nlattr *attr) return 0; } +static int validate_time(const struct nlattr *attr, const char *name, + struct netlink_ext_ack *extack) +{ + if (nla_get_s64(attr) < 0) { + NL_SET_ERR_MSG_ATTR_FMT(extack, attr, "negative %s", name); + return -EINVAL; + } + return 0; +} + +static int validate_slot(const struct nlattr *attr, struct netlink_ext_ack *extack) +{ + const struct tc_netem_slot *c = nla_data(attr); + + if (c->min_delay < 0 || c->max_delay < 0) { + NL_SET_ERR_MSG_ATTR(extack, attr, "negative slot delay"); + return -EINVAL; + } + if (c->min_delay > c->max_delay) { + NL_SET_ERR_MSG_ATTR(extack, attr, "slot min delay greater than max delay"); + return -EINVAL; + } + if (c->dist_delay < 0 || c->dist_jitter < 0) { + NL_SET_ERR_MSG_ATTR(extack, attr, "negative dist delay"); + return -EINVAL; + } + if (c->max_packets < 0 || c->max_bytes < 0) { + NL_SET_ERR_MSG_ATTR(extack, attr, "negative slot limit"); + return -EINVAL; + } + return 0; +} + static void get_slot(struct netem_sched_data *q, const struct nlattr *attr) { const struct tc_netem_slot *c = nla_data(attr); @@ -1040,6 +1072,24 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, goto table_free; } + if (tb[TCA_NETEM_SLOT]) { + ret = validate_slot(tb[TCA_NETEM_SLOT], extack); + if (ret) + goto table_free; + } + + if (tb[TCA_NETEM_LATENCY64]) { + ret = validate_time(tb[TCA_NETEM_LATENCY64], "latency", extack); + if (ret) + goto table_free; + } + + if (tb[TCA_NETEM_JITTER64]) { + ret = validate_time(tb[TCA_NETEM_JITTER64], "jitter", extack); + if (ret) + goto table_free; + } + sch_tree_lock(sch); /* backup q->clg and q->loss_model */ old_clg = q->clg; @@ -1112,11 +1162,10 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, /* capping jitter to the range acceptable by tabledist() */ q->jitter = min_t(s64, abs(q->jitter), INT_MAX); - if (tb[TCA_NETEM_PRNG_SEED]) + if (tb[TCA_NETEM_PRNG_SEED]) { q->prng.seed = nla_get_u64(tb[TCA_NETEM_PRNG_SEED]); - else - q->prng.seed = get_random_u64(); - prandom_seed_state(&q->prng.prng_state, q->prng.seed); + prandom_seed_state(&q->prng.prng_state, q->prng.seed); + } unlock: sch_tree_unlock(sch); @@ -1139,6 +1188,9 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt, return -EINVAL; q->loss_model = CLG_RANDOM; + q->prng.seed = get_random_u64(); + prandom_seed_state(&q->prng.prng_state, q->prng.seed); + ret = netem_change(sch, opt, extack); if (ret) pr_info("netem: change failed\n"); diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 16f3f629cb8e..b41f2def2e2c 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -90,7 +90,7 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, bool enqueue = false; if (unlikely(qdisc_qlen(sch) >= sch->limit)) { - q->stats.overlimit++; + WRITE_ONCE(q->stats.overlimit, q->stats.overlimit + 1); goto out; } @@ -104,7 +104,7 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* If packet is ecn capable, mark it if drop probability * is lower than 10%, else drop it. */ - q->stats.ecn_mark++; + WRITE_ONCE(q->stats.ecn_mark, q->stats.ecn_mark + 1); enqueue = true; } @@ -114,15 +114,15 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (!q->params.dq_rate_estimator) pie_set_enqueue_time(skb); - q->stats.packets_in++; + WRITE_ONCE(q->stats.packets_in, q->stats.packets_in + 1); if (qdisc_qlen(sch) > q->stats.maxq) - q->stats.maxq = qdisc_qlen(sch); + WRITE_ONCE(q->stats.maxq, qdisc_qlen(sch)); return qdisc_enqueue_tail(skb, sch); } out: - q->stats.dropped++; + WRITE_ONCE(q->stats.dropped, q->stats.dropped + 1); q->vars.accu_prob = 0; return qdisc_drop_reason(skb, sch, to_free, reason); } @@ -219,16 +219,14 @@ void pie_process_dequeue(struct sk_buff *skb, struct pie_params *params, * packet timestamp. */ if (!params->dq_rate_estimator) { - vars->qdelay = now - pie_get_enqueue_time(skb); + WRITE_ONCE(vars->qdelay, + backlog ? now - pie_get_enqueue_time(skb) : 0); if (vars->dq_tstamp != DTIME_INVALID) dtime = now - vars->dq_tstamp; vars->dq_tstamp = now; - if (backlog == 0) - vars->qdelay = 0; - if (dtime == 0) return; @@ -267,11 +265,11 @@ void pie_process_dequeue(struct sk_buff *skb, struct pie_params *params, count = count / dtime; if (vars->avg_dq_rate == 0) - vars->avg_dq_rate = count; + WRITE_ONCE(vars->avg_dq_rate, count); else - vars->avg_dq_rate = + WRITE_ONCE(vars->avg_dq_rate, (vars->avg_dq_rate - - (vars->avg_dq_rate >> 3)) + (count >> 3); + (vars->avg_dq_rate >> 3)) + (count >> 3)); /* If the queue has receded below the threshold, we hold * on to the last drain rate calculated, else we reset @@ -376,12 +374,12 @@ void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars, if (qdelay > (PSCHED_NS2TICKS(250 * NSEC_PER_MSEC))) delta += MAX_PROB / (100 / 2); - vars->prob += delta; + WRITE_ONCE(vars->prob, vars->prob + delta); if (delta > 0) { /* prevent overflow */ if (vars->prob < oldprob) { - vars->prob = MAX_PROB; + WRITE_ONCE(vars->prob, MAX_PROB); /* Prevent normalization error. If probability is at * maximum value already, we normalize it here, and * skip the check to do a non-linear drop in the next @@ -392,7 +390,7 @@ void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars, } else { /* prevent underflow */ if (vars->prob > oldprob) - vars->prob = 0; + WRITE_ONCE(vars->prob, 0); } /* Non-linear drop in probability: Reduce drop probability quickly if @@ -401,9 +399,9 @@ void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars, if (qdelay == 0 && qdelay_old == 0 && update_prob) /* Reduce drop probability to 98.4% */ - vars->prob -= vars->prob / 64; + WRITE_ONCE(vars->prob, vars->prob - vars->prob / 64); - vars->qdelay = qdelay; + WRITE_ONCE(vars->qdelay, qdelay); vars->backlog_old = backlog; /* We restart the measurement cycle if the following conditions are met @@ -501,22 +499,22 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct pie_sched_data *q = qdisc_priv(sch); struct tc_pie_xstats st = { - .prob = q->vars.prob << BITS_PER_BYTE, - .delay = ((u32)PSCHED_TICKS2NS(q->vars.qdelay)) / + .prob = READ_ONCE(q->vars.prob) << BITS_PER_BYTE, + .delay = ((u32)PSCHED_TICKS2NS(READ_ONCE(q->vars.qdelay))) / NSEC_PER_USEC, - .packets_in = q->stats.packets_in, - .overlimit = q->stats.overlimit, - .maxq = q->stats.maxq, - .dropped = q->stats.dropped, - .ecn_mark = q->stats.ecn_mark, + .packets_in = READ_ONCE(q->stats.packets_in), + .overlimit = READ_ONCE(q->stats.overlimit), + .maxq = READ_ONCE(q->stats.maxq), + .dropped = READ_ONCE(q->stats.dropped), + .ecn_mark = READ_ONCE(q->stats.ecn_mark), }; /* avg_dq_rate is only valid if dq_rate_estimator is enabled */ - st.dq_rate_estimating = q->params.dq_rate_estimator; + st.dq_rate_estimating = READ_ONCE(q->params.dq_rate_estimator); /* unscale and return dq_rate in bytes per sec */ - if (q->params.dq_rate_estimator) - st.avg_dq_rate = q->vars.avg_dq_rate * + if (st.dq_rate_estimating) + st.avg_dq_rate = READ_ONCE(q->vars.avg_dq_rate) * (PSCHED_TICKS_PER_SEC) >> PIE_SCALE; return gnet_stats_copy_app(d, &st, sizeof(st)); diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index c8d3d09f15e3..4d0e44a2e7c6 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -90,17 +90,20 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, case RED_PROB_MARK: qdisc_qstats_overlimit(sch); if (!red_use_ecn(q)) { - q->stats.prob_drop++; + WRITE_ONCE(q->stats.prob_drop, + q->stats.prob_drop + 1); goto congestion_drop; } if (INET_ECN_set_ce(skb)) { - q->stats.prob_mark++; + WRITE_ONCE(q->stats.prob_mark, + q->stats.prob_mark + 1); skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret); if (!skb) return NET_XMIT_CN | ret; } else if (!red_use_nodrop(q)) { - q->stats.prob_drop++; + WRITE_ONCE(q->stats.prob_drop, + q->stats.prob_drop + 1); goto congestion_drop; } @@ -111,17 +114,20 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, reason = QDISC_DROP_OVERLIMIT; qdisc_qstats_overlimit(sch); if (red_use_harddrop(q) || !red_use_ecn(q)) { - q->stats.forced_drop++; + WRITE_ONCE(q->stats.forced_drop, + q->stats.forced_drop + 1); goto congestion_drop; } if (INET_ECN_set_ce(skb)) { - q->stats.forced_mark++; + WRITE_ONCE(q->stats.forced_mark, + q->stats.forced_mark + 1); skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret); if (!skb) return NET_XMIT_CN | ret; } else if (!red_use_nodrop(q)) { - q->stats.forced_drop++; + WRITE_ONCE(q->stats.forced_drop, + q->stats.forced_drop + 1); goto congestion_drop; } @@ -135,7 +141,8 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, sch->qstats.backlog += len; sch->q.qlen++; } else if (net_xmit_drop_count(ret)) { - q->stats.pdrop++; + WRITE_ONCE(q->stats.pdrop, + q->stats.pdrop + 1); qdisc_qstats_drop(sch); } return ret; @@ -155,7 +162,7 @@ static struct sk_buff *red_dequeue(struct Qdisc *sch) struct red_sched_data *q = qdisc_priv(sch); struct Qdisc *child = q->qdisc; - skb = child->dequeue(child); + skb = qdisc_dequeue_peeked(child); if (skb) { qdisc_bstats_update(sch, skb); qdisc_qstats_backlog_dec(sch, skb); @@ -463,9 +470,13 @@ static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &hw_stats_request); } - st.early = q->stats.prob_drop + q->stats.forced_drop; - st.pdrop = q->stats.pdrop; - st.marked = q->stats.prob_mark + q->stats.forced_mark; + st.early = READ_ONCE(q->stats.prob_drop) + + READ_ONCE(q->stats.forced_drop); + + st.pdrop = READ_ONCE(q->stats.pdrop); + + st.marked = READ_ONCE(q->stats.prob_mark) + + READ_ONCE(q->stats.forced_mark); return gnet_stats_copy_app(d, &st, sizeof(st)); } diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 013738662128..d3ee8e5479b3 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -130,7 +130,7 @@ static void increment_one_qlen(u32 sfbhash, u32 slot, struct sfb_sched_data *q) sfbhash >>= SFB_BUCKET_SHIFT; if (b[hash].qlen < 0xFFFF) - b[hash].qlen++; + WRITE_ONCE(b[hash].qlen, b[hash].qlen + 1); b += SFB_NUMBUCKETS; /* next level */ } } @@ -159,7 +159,7 @@ static void decrement_one_qlen(u32 sfbhash, u32 slot, sfbhash >>= SFB_BUCKET_SHIFT; if (b[hash].qlen > 0) - b[hash].qlen--; + WRITE_ONCE(b[hash].qlen, b[hash].qlen - 1); b += SFB_NUMBUCKETS; /* next level */ } } @@ -179,12 +179,12 @@ static void decrement_qlen(const struct sk_buff *skb, struct sfb_sched_data *q) static void decrement_prob(struct sfb_bucket *b, struct sfb_sched_data *q) { - b->p_mark = prob_minus(b->p_mark, q->decrement); + WRITE_ONCE(b->p_mark, prob_minus(b->p_mark, q->decrement)); } static void increment_prob(struct sfb_bucket *b, struct sfb_sched_data *q) { - b->p_mark = prob_plus(b->p_mark, q->increment); + WRITE_ONCE(b->p_mark, prob_plus(b->p_mark, q->increment)); } static void sfb_zero_all_buckets(struct sfb_sched_data *q) @@ -202,11 +202,14 @@ static u32 sfb_compute_qlen(u32 *prob_r, u32 *avgpm_r, const struct sfb_sched_da const struct sfb_bucket *b = &q->bins[q->slot].bins[0][0]; for (i = 0; i < SFB_LEVELS * SFB_NUMBUCKETS; i++) { - if (qlen < b->qlen) - qlen = b->qlen; - totalpm += b->p_mark; - if (prob < b->p_mark) - prob = b->p_mark; + u32 b_qlen = READ_ONCE(b->qlen); + u32 b_mark = READ_ONCE(b->p_mark); + + if (qlen < b_qlen) + qlen = b_qlen; + totalpm += b_mark; + if (prob < b_mark) + prob = b_mark; b++; } *prob_r = prob; @@ -295,7 +298,8 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (unlikely(sch->q.qlen >= q->limit)) { qdisc_qstats_overlimit(sch); - q->stats.queuedrop++; + WRITE_ONCE(q->stats.queuedrop, + q->stats.queuedrop + 1); goto drop; } @@ -348,7 +352,8 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (unlikely(minqlen >= q->max)) { qdisc_qstats_overlimit(sch); - q->stats.bucketdrop++; + WRITE_ONCE(q->stats.bucketdrop, + q->stats.bucketdrop + 1); goto drop; } @@ -374,7 +379,8 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, } if (sfb_rate_limit(skb, q)) { qdisc_qstats_overlimit(sch); - q->stats.penaltydrop++; + WRITE_ONCE(q->stats.penaltydrop, + q->stats.penaltydrop + 1); goto drop; } goto enqueue; @@ -390,14 +396,17 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, * In either case, we want to start dropping packets. */ if (r < (p_min - SFB_MAX_PROB / 2) * 2) { - q->stats.earlydrop++; + WRITE_ONCE(q->stats.earlydrop, + q->stats.earlydrop + 1); goto drop; } } if (INET_ECN_set_ce(skb)) { - q->stats.marked++; + WRITE_ONCE(q->stats.marked, + q->stats.marked + 1); } else { - q->stats.earlydrop++; + WRITE_ONCE(q->stats.earlydrop, + q->stats.earlydrop + 1); goto drop; } } @@ -410,7 +419,8 @@ enqueue: sch->q.qlen++; increment_qlen(&cb, q); } else if (net_xmit_drop_count(ret)) { - q->stats.childdrop++; + WRITE_ONCE(q->stats.childdrop, + q->stats.childdrop + 1); qdisc_qstats_drop(sch); } return ret; @@ -431,7 +441,7 @@ static struct sk_buff *sfb_dequeue(struct Qdisc *sch) struct Qdisc *child = q->qdisc; struct sk_buff *skb; - skb = child->dequeue(q->qdisc); + skb = qdisc_dequeue_peeked(child); if (skb) { qdisc_bstats_update(sch, skb); @@ -599,12 +609,12 @@ static int sfb_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct sfb_sched_data *q = qdisc_priv(sch); struct tc_sfb_xstats st = { - .earlydrop = q->stats.earlydrop, - .penaltydrop = q->stats.penaltydrop, - .bucketdrop = q->stats.bucketdrop, - .queuedrop = q->stats.queuedrop, - .childdrop = q->stats.childdrop, - .marked = q->stats.marked, + .earlydrop = READ_ONCE(q->stats.earlydrop), + .penaltydrop = READ_ONCE(q->stats.penaltydrop), + .bucketdrop = READ_ONCE(q->stats.bucketdrop), + .queuedrop = READ_ONCE(q->stats.queuedrop), + .childdrop = READ_ONCE(q->stats.childdrop), + .marked = READ_ONCE(q->stats.marked), }; st.maxqlen = sfb_compute_qlen(&st.maxprob, &st.avgprob, q); diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index c3f3181dba54..f39822babf88 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -225,7 +225,8 @@ static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x) sfq_unlink(q, x, n, p); - d = q->slots[x].qlen--; + d = q->slots[x].qlen; + WRITE_ONCE(q->slots[x].qlen, d - 1); if (n == p && q->cur_depth == d) q->cur_depth--; sfq_link(q, x); @@ -238,7 +239,8 @@ static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x) sfq_unlink(q, x, n, p); - d = ++q->slots[x].qlen; + d = q->slots[x].qlen + 1; + WRITE_ONCE(q->slots[x].qlen, d); if (q->cur_depth < d) q->cur_depth = d; sfq_link(q, x); @@ -298,7 +300,7 @@ static unsigned int sfq_drop(struct Qdisc *sch, struct sk_buff **to_free) drop: skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot); len = qdisc_pkt_len(skb); - slot->backlog -= len; + WRITE_ONCE(slot->backlog, slot->backlog - len); sfq_dec(q, x); sch->q.qlen--; qdisc_qstats_backlog_dec(sch, skb); @@ -314,7 +316,7 @@ drop: q->tail = NULL; /* no more active slots */ else q->tail->next = slot->next; - q->ht[slot->hash] = SFQ_EMPTY_SLOT; + WRITE_ONCE(q->ht[slot->hash], SFQ_EMPTY_SLOT); goto drop; } @@ -364,10 +366,10 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) x = q->dep[0].next; /* get a free slot */ if (x >= SFQ_MAX_FLOWS) return qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_MAXFLOWS); - q->ht[hash] = x; + WRITE_ONCE(q->ht[hash], x); slot = &q->slots[x]; slot->hash = hash; - slot->backlog = 0; /* should already be 0 anyway... */ + WRITE_ONCE(slot->backlog, 0); /* should already be 0 anyway... */ red_set_vars(&slot->vars); goto enqueue; } @@ -426,7 +428,7 @@ congestion_drop: head = slot_dequeue_head(slot); delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb); sch->qstats.backlog -= delta; - slot->backlog -= delta; + WRITE_ONCE(slot->backlog, slot->backlog - delta); qdisc_drop_reason(head, sch, to_free, QDISC_DROP_FLOW_LIMIT); slot_queue_add(slot, skb); @@ -436,7 +438,7 @@ congestion_drop: enqueue: qdisc_qstats_backlog_inc(sch, skb); - slot->backlog += qdisc_pkt_len(skb); + WRITE_ONCE(slot->backlog, slot->backlog + qdisc_pkt_len(skb)); slot_queue_add(slot, skb); sfq_inc(q, x); if (slot->qlen == 1) { /* The flow is new */ @@ -452,7 +454,7 @@ enqueue: */ q->tail = slot; /* We could use a bigger initial quantum for new flows */ - slot->allot = q->quantum; + WRITE_ONCE(slot->allot, q->quantum); } if (++sch->q.qlen <= q->limit) return NET_XMIT_SUCCESS; @@ -489,7 +491,7 @@ next_slot: slot = &q->slots[a]; if (slot->allot <= 0) { q->tail = slot; - slot->allot += q->quantum; + WRITE_ONCE(slot->allot, slot->allot + q->quantum); goto next_slot; } skb = slot_dequeue_head(slot); @@ -497,10 +499,10 @@ next_slot: qdisc_bstats_update(sch, skb); sch->q.qlen--; qdisc_qstats_backlog_dec(sch, skb); - slot->backlog -= qdisc_pkt_len(skb); + WRITE_ONCE(slot->backlog, slot->backlog - qdisc_pkt_len(skb)); /* Is the slot empty? */ if (slot->qlen == 0) { - q->ht[slot->hash] = SFQ_EMPTY_SLOT; + WRITE_ONCE(q->ht[slot->hash], SFQ_EMPTY_SLOT); next_a = slot->next; if (a == next_a) { q->tail = NULL; /* no more active slots */ @@ -508,7 +510,7 @@ next_slot: } q->tail->next = next_a; } else { - slot->allot -= qdisc_pkt_len(skb); + WRITE_ONCE(slot->allot, slot->allot - qdisc_pkt_len(skb)); } return skb; } @@ -549,9 +551,9 @@ static void sfq_rehash(struct Qdisc *sch) sfq_dec(q, i); __skb_queue_tail(&list, skb); } - slot->backlog = 0; + WRITE_ONCE(slot->backlog, 0); red_set_vars(&slot->vars); - q->ht[slot->hash] = SFQ_EMPTY_SLOT; + WRITE_ONCE(q->ht[slot->hash], SFQ_EMPTY_SLOT); } q->tail = NULL; @@ -570,7 +572,7 @@ drop: dropped++; continue; } - q->ht[hash] = x; + WRITE_ONCE(q->ht[hash], x); slot = &q->slots[x]; slot->hash = hash; } @@ -581,7 +583,7 @@ drop: slot->vars.qavg = red_calc_qavg(q->red_parms, &slot->vars, slot->backlog); - slot->backlog += qdisc_pkt_len(skb); + WRITE_ONCE(slot->backlog, slot->backlog + qdisc_pkt_len(skb)); sfq_inc(q, x); if (slot->qlen == 1) { /* The flow is new */ if (q->tail == NULL) { /* It is the first flow */ @@ -591,7 +593,7 @@ drop: q->tail->next = x; } q->tail = slot; - slot->allot = q->quantum; + WRITE_ONCE(slot->allot, q->quantum); } } sch->q.qlen -= dropped; @@ -905,16 +907,16 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct gnet_dump *d) { struct sfq_sched_data *q = qdisc_priv(sch); - sfq_index idx = q->ht[cl - 1]; + sfq_index idx = READ_ONCE(q->ht[cl - 1]); struct gnet_stats_queue qs = { 0 }; struct tc_sfq_xstats xstats = { 0 }; if (idx != SFQ_EMPTY_SLOT) { const struct sfq_slot *slot = &q->slots[idx]; - xstats.allot = slot->allot; - qs.qlen = slot->qlen; - qs.backlog = slot->backlog; + xstats.allot = READ_ONCE(slot->allot); + qs.qlen = READ_ONCE(slot->qlen); + qs.backlog = READ_ONCE(slot->backlog); } if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0) return -1; @@ -930,7 +932,7 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) return; for (i = 0; i < q->divisor; i++) { - if (q->ht[i] == SFQ_EMPTY_SLOT) { + if (READ_ONCE(q->ht[i]) == SFQ_EMPTY_SLOT) { arg->count++; continue; } diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 8e3752811950..45245157e00a 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -634,7 +634,7 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, queue = skb_get_queue_mapping(skb); child = q->qdiscs[queue]; - if (unlikely(!child)) + if (unlikely(child == &noop_qdisc)) return qdisc_drop(skb, sch, to_free); if (taprio_skb_exceeds_queue_max_sdu(sch, skb)) { @@ -717,7 +717,7 @@ static struct sk_buff *taprio_dequeue_from_txq(struct Qdisc *sch, int txq, int len; u8 tc; - if (unlikely(!child)) + if (unlikely(child == &noop_qdisc)) return NULL; if (TXTIME_ASSIST_IS_ENABLED(q->flags)) @@ -972,11 +972,12 @@ static enum hrtimer_restart advance_sched(struct hrtimer *timer) } if (should_change_schedules(admin, oper, end_time)) { - /* Set things so the next time this runs, the new - * schedule runs. - */ - end_time = sched_base_time(admin); switch_schedules(q, &admin, &oper); + /* After changing schedules, the next entry is the first one + * in the new schedule, with a pre-calculated end_time. + */ + next = list_first_entry(&oper->entries, struct sched_entry, list); + end_time = next->end_time; } next->end_time = end_time; @@ -2183,6 +2184,9 @@ static int taprio_graft(struct Qdisc *sch, unsigned long cl, if (!dev_queue) return -EINVAL; + if (!new) + new = &noop_qdisc; + if (dev->flags & IFF_UP) dev_deactivate(dev, false); @@ -2196,14 +2200,14 @@ static int taprio_graft(struct Qdisc *sch, unsigned long cl, *old = q->qdiscs[cl - 1]; if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { WARN_ON_ONCE(dev_graft_qdisc(dev_queue, new) != *old); - if (new) + if (new != &noop_qdisc) qdisc_refcount_inc(new); - if (*old) + if (*old && *old != &noop_qdisc) qdisc_put(*old); } q->qdiscs[cl - 1] = new; - if (new) + if (new != &noop_qdisc) new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; if (dev->flags & IFF_UP) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 7b823d759141..8e89a870780c 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1556,6 +1556,12 @@ static enum sctp_disposition sctp_sf_do_unexpected_init( /* Tag the variable length parameters. */ chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(struct sctp_inithdr)); + if (asoc->state >= SCTP_STATE_ESTABLISHED) { + /* Discard INIT matching peer vtag after handshake completion (stale INIT). */ + if (ntohl(chunk->subh.init_hdr->init_tag) == asoc->peer.i.init_tag) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + } + /* Verify the INIT chunk before processing it. */ err_chunk = NULL; if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type, diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d2665bbd41a2..1d2568bb6bc2 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1986,6 +1986,15 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) goto out_unlock; iov_iter_revert(&msg->msg_iter, err); + + /* sctp_sendmsg_to_asoc() may have released the socket + * lock (sctp_wait_for_sndbuf), during which other + * associations on ep->asocs could have been peeled + * off or freed. @asoc itself is revalidated by the + * base.dead and base.sk checks in sctp_wait_for_sndbuf, + * so re-derive the cached cursor from it. + */ + tmp = list_next_entry(asoc, asocs); } goto out_unlock; @@ -4855,8 +4864,9 @@ static struct sock *sctp_clone_sock(struct sock *sk, if (!newsk) return ERR_PTR(err); - /* sk_clone() sets refcnt to 2 */ + /* sk_clone() sets refcnt to 2 and increments sockets_allocated */ sock_put(newsk); + sk_sockets_allocated_dec(newsk); newinet = inet_sk(newsk); newsp = sctp_sk(newsk); @@ -7033,7 +7043,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, /* See if the user provided enough room for all the data */ num_chunks = ntohs(ch->param_hdr.length) - sizeof(struct sctp_paramhdr); - if (len < num_chunks) + if (len < sizeof(struct sctp_authchunks) + num_chunks) return -EINVAL; if (copy_to_user(to, ch->chunks, num_chunks)) diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c index 94bc9c7382ea..b1c65110f04d 100644 --- a/net/shaper/shaper.c +++ b/net/shaper/shaper.c @@ -21,6 +21,8 @@ #define NET_SHAPER_ID_UNSPEC NET_SHAPER_ID_MASK +static_assert(NET_SHAPER_ID_UNSPEC == NET_SHAPER_MAX_HANDLE_ID + 1); + struct net_shaper_hierarchy { struct xarray shapers; }; @@ -90,6 +92,12 @@ static int net_shaper_handle_size(void) nla_total_size(sizeof(u32))); } +static int net_shaper_group_reply_size(void) +{ + return nla_total_size(sizeof(u32)) + /* NET_SHAPER_A_IFINDEX */ + net_shaper_handle_size(); /* NET_SHAPER_A_HANDLE */ +} + static int net_shaper_fill_binding(struct sk_buff *msg, const struct net_shaper_binding *binding, u32 type) @@ -275,11 +283,13 @@ static void net_shaper_default_parent(const struct net_shaper_handle *handle, parent->id = 0; } -/* - * MARK_0 is already in use due to XA_FLAGS_ALLOC, can't reuse such flag as - * it's cleared by xa_store(). +/* MARK_0 is already in use due to XA_FLAGS_ALLOC. The VALID mark is set on + * an entry only after the device-side configuration has completed + * successfully (see net_shaper_commit()). Lookups and dumps must filter on + * this mark to avoid exposing tentative entries inserted by + * net_shaper_pre_insert() while the driver call is still in flight. */ -#define NET_SHAPER_NOT_VALID XA_MARK_1 +#define NET_SHAPER_VALID XA_MARK_1 static struct net_shaper * net_shaper_lookup(struct net_shaper_binding *binding, @@ -289,10 +299,14 @@ net_shaper_lookup(struct net_shaper_binding *binding, struct net_shaper_hierarchy *hierarchy; hierarchy = net_shaper_hierarchy_rcu(binding); - if (!hierarchy || xa_get_mark(&hierarchy->shapers, index, - NET_SHAPER_NOT_VALID)) + if (!hierarchy || !xa_get_mark(&hierarchy->shapers, index, + NET_SHAPER_VALID)) return NULL; + /* Pairs with smp_wmb() in net_shaper_commit(): if the entry is + * valid, its contents must be visible too. + */ + smp_rmb(); return xa_load(&hierarchy->shapers, index); } @@ -348,7 +362,7 @@ static int net_shaper_pre_insert(struct net_shaper_binding *binding, handle->id == NET_SHAPER_ID_UNSPEC) { u32 min, max; - handle->id = NET_SHAPER_ID_MASK - 1; + handle->id = NET_SHAPER_MAX_HANDLE_ID; max = net_shaper_handle_to_index(handle); handle->id = 0; min = net_shaper_handle_to_index(handle); @@ -370,13 +384,10 @@ static int net_shaper_pre_insert(struct net_shaper_binding *binding, goto free_id; } - /* Mark 'tentative' shaper inside the hierarchy container. - * xa_set_mark is a no-op if the previous store fails. + /* Insert as 'tentative' (no VALID mark). The mark will be set by + * net_shaper_commit() once the driver-side configuration succeeds. */ - xa_lock(&hierarchy->shapers); - prev = __xa_store(&hierarchy->shapers, index, cur, GFP_KERNEL); - __xa_set_mark(&hierarchy->shapers, index, NET_SHAPER_NOT_VALID); - xa_unlock(&hierarchy->shapers); + prev = xa_store(&hierarchy->shapers, index, cur, GFP_KERNEL); if (xa_err(prev)) { NL_SET_ERR_MSG(extack, "Can't insert shaper into device store"); kfree_rcu(cur, rcu); @@ -413,9 +424,9 @@ static void net_shaper_commit(struct net_shaper_binding *binding, /* Successful update: drop the tentative mark * and update the hierarchy container. */ - __xa_clear_mark(&hierarchy->shapers, index, - NET_SHAPER_NOT_VALID); *cur = shapers[i]; + smp_wmb(); + __xa_set_mark(&hierarchy->shapers, index, NET_SHAPER_VALID); } xa_unlock(&hierarchy->shapers); } @@ -431,8 +442,9 @@ static void net_shaper_rollback(struct net_shaper_binding *binding) return; xa_lock(&hierarchy->shapers); - xa_for_each_marked(&hierarchy->shapers, index, cur, - NET_SHAPER_NOT_VALID) { + xa_for_each(&hierarchy->shapers, index, cur) { + if (xa_get_mark(&hierarchy->shapers, index, NET_SHAPER_VALID)) + continue; __xa_erase(&hierarchy->shapers, index); kfree(cur); } @@ -465,10 +477,21 @@ static int net_shaper_parse_handle(const struct nlattr *attr, * shaper (any other value). */ id_attr = tb[NET_SHAPER_A_HANDLE_ID]; - if (id_attr) + if (id_attr) { id = nla_get_u32(id_attr); - else if (handle->scope == NET_SHAPER_SCOPE_NODE) + } else if (handle->scope == NET_SHAPER_SCOPE_NODE) { id = NET_SHAPER_ID_UNSPEC; + } else if (handle->scope == NET_SHAPER_SCOPE_QUEUE) { + NL_SET_ERR_ATTR_MISS(info->extack, attr, + NET_SHAPER_A_HANDLE_ID); + return -EINVAL; + } + + if (id && handle->scope == NET_SHAPER_SCOPE_NETDEV) { + NL_SET_ERR_MSG_ATTR(info->extack, id_attr, + "Netdev scope is a singleton, must use ID 0"); + return -EINVAL; + } handle->id = id; return 0; @@ -836,7 +859,12 @@ int net_shaper_nl_get_dumpit(struct sk_buff *skb, goto out_unlock; for (; (shaper = xa_find(&hierarchy->shapers, &ctx->start_index, - U32_MAX, XA_PRESENT)); ctx->start_index++) { + U32_MAX, NET_SHAPER_VALID)); + ctx->start_index++) { + /* Pairs with smp_wmb() in net_shaper_commit(): the entry + * is marked VALID, so its contents must be visible too. + */ + smp_rmb(); ret = net_shaper_fill_one(skb, binding, shaper, info); if (ret) break; @@ -932,6 +960,46 @@ static int net_shaper_handle_cmp(const struct net_shaper_handle *a, return memcmp(a, b, sizeof(*a)); } +static int net_shaper_parse_leaves(struct net_shaper_binding *binding, + struct genl_info *info, + const struct net_shaper *node, + struct net_shaper *leaves, + int leaves_count) +{ + struct nlattr *attr; + int i, j, ret, rem; + + i = 0; + nla_for_each_attr_type(attr, NET_SHAPER_A_LEAVES, + genlmsg_data(info->genlhdr), + genlmsg_len(info->genlhdr), rem) { + if (WARN_ON_ONCE(i >= leaves_count)) + return -EINVAL; + + ret = net_shaper_parse_leaf(binding, attr, info, + node, &leaves[i]); + if (ret) + return ret; + + /* Reject duplicates */ + for (j = 0; j < i; j++) { + if (net_shaper_handle_cmp(&leaves[i].handle, + &leaves[j].handle)) + continue; + + NL_SET_ERR_MSG_ATTR_FMT(info->extack, attr, + "Duplicate leaf shaper %d:%d", + leaves[i].handle.scope, + leaves[i].handle.id); + return -EINVAL; + } + + i++; + } + + return 0; +} + static int net_shaper_parent_from_leaves(int leaves_count, const struct net_shaper *leaves, struct net_shaper *node, @@ -964,15 +1032,22 @@ static int __net_shaper_group(struct net_shaper_binding *binding, int i, ret; if (node->handle.scope == NET_SHAPER_SCOPE_NODE) { + struct net_shaper *cur = NULL; + new_node = node->handle.id == NET_SHAPER_ID_UNSPEC; - if (!new_node && !net_shaper_lookup(binding, &node->handle)) { - /* The related attribute is not available when - * reaching here from the delete() op. - */ - NL_SET_ERR_MSG_FMT(extack, "Node shaper %d:%d does not exists", - node->handle.scope, node->handle.id); - return -ENOENT; + if (!new_node) { + cur = net_shaper_lookup(binding, &node->handle); + if (!cur) { + /* The related attribute is not available + * when reaching here from the delete() op. + */ + NL_SET_ERR_MSG_FMT(extack, + "Node shaper %d:%d does not exist", + node->handle.scope, + node->handle.id); + return -ENOENT; + } } /* When unspecified, the node parent scope is inherited from @@ -986,6 +1061,15 @@ static int __net_shaper_group(struct net_shaper_binding *binding, return ret; } + if (cur && net_shaper_handle_cmp(&cur->parent, + &node->parent)) { + NL_SET_ERR_MSG_FMT(extack, + "Cannot reparent node shaper %d:%d", + node->handle.scope, + node->handle.id); + return -EOPNOTSUPP; + } + } else { net_shaper_default_parent(&node->handle, &node->parent); } @@ -1162,7 +1246,7 @@ static int net_shaper_group_send_reply(struct net_shaper_binding *binding, free_msg: /* Should never happen as msg is pre-allocated with enough space. */ WARN_ONCE(true, "calculated message payload length (%d)", - net_shaper_handle_size()); + net_shaper_group_reply_size()); nlmsg_free(msg); return -EMSGSIZE; } @@ -1172,10 +1256,9 @@ int net_shaper_nl_group_doit(struct sk_buff *skb, struct genl_info *info) struct net_shaper **old_nodes, *leaves, node = {}; struct net_shaper_hierarchy *hierarchy; struct net_shaper_binding *binding; - int i, ret, rem, leaves_count; + int i, ret, leaves_count; int old_nodes_count = 0; struct sk_buff *msg; - struct nlattr *attr; if (GENL_REQ_ATTR_CHECK(info, NET_SHAPER_A_LEAVES)) return -EINVAL; @@ -1203,26 +1286,19 @@ int net_shaper_nl_group_doit(struct sk_buff *skb, struct genl_info *info) if (ret) goto free_leaves; - i = 0; - nla_for_each_attr_type(attr, NET_SHAPER_A_LEAVES, - genlmsg_data(info->genlhdr), - genlmsg_len(info->genlhdr), rem) { - if (WARN_ON_ONCE(i >= leaves_count)) - goto free_leaves; - - ret = net_shaper_parse_leaf(binding, attr, info, - &node, &leaves[i]); - if (ret) - goto free_leaves; - i++; - } + ret = net_shaper_parse_leaves(binding, info, &node, + leaves, leaves_count); + if (ret) + goto free_leaves; /* Prepare the msg reply in advance, to avoid device operation * rollback on allocation failure. */ - msg = genlmsg_new(net_shaper_handle_size(), GFP_KERNEL); - if (!msg) + msg = genlmsg_new(net_shaper_group_reply_size(), GFP_KERNEL); + if (!msg) { + ret = -ENOMEM; goto free_leaves; + } hierarchy = net_shaper_hierarchy_setup(binding); if (!hierarchy) { diff --git a/net/shaper/shaper_nl_gen.c b/net/shaper/shaper_nl_gen.c index 9b29be3ef19a..76eff85ec66d 100644 --- a/net/shaper/shaper_nl_gen.c +++ b/net/shaper/shaper_nl_gen.c @@ -11,10 +11,15 @@ #include <uapi/linux/net_shaper.h> +/* Integer value ranges */ +static const struct netlink_range_validation net_shaper_a_handle_id_range = { + .max = NET_SHAPER_MAX_HANDLE_ID, +}; + /* Common nested types */ const struct nla_policy net_shaper_handle_nl_policy[NET_SHAPER_A_HANDLE_ID + 1] = { [NET_SHAPER_A_HANDLE_SCOPE] = NLA_POLICY_MAX(NLA_U32, 3), - [NET_SHAPER_A_HANDLE_ID] = { .type = NLA_U32, }, + [NET_SHAPER_A_HANDLE_ID] = NLA_POLICY_FULL_RANGE(NLA_U32, &net_shaper_a_handle_id_range), }; const struct nla_policy net_shaper_leaf_info_nl_policy[NET_SHAPER_A_WEIGHT + 1] = { diff --git a/net/shaper/shaper_nl_gen.h b/net/shaper/shaper_nl_gen.h index 42c46c52c775..2406652a9014 100644 --- a/net/shaper/shaper_nl_gen.h +++ b/net/shaper/shaper_nl_gen.h @@ -12,6 +12,8 @@ #include <uapi/linux/net_shaper.h> +#define NET_SHAPER_MAX_HANDLE_ID 67108862 + /* Common nested types */ extern const struct nla_policy net_shaper_handle_nl_policy[NET_SHAPER_A_HANDLE_ID + 1]; extern const struct nla_policy net_shaper_leaf_info_nl_policy[NET_SHAPER_A_WEIGHT + 1]; diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 1a565095376a..dffbd529762d 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1400,7 +1400,8 @@ smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm *aclc, int i; for (i = 0; i < ini->ism_offered_cnt + 1; i++) { - if (ini->ism_chid[i] == ntohs(aclc->d1.chid)) { + if (ini->ism_dev[i] && + ini->ism_chid[i] == ntohs(aclc->d1.chid)) { ini->ism_selected = i; return 0; } @@ -1628,12 +1629,8 @@ static void smc_connect_work(struct work_struct *work) lock_sock(&smc->sk); if (rc != 0 || smc->sk.sk_err) { smc->sk.sk_state = SMC_CLOSED; - if (rc == -EPIPE || rc == -EAGAIN) - smc->sk.sk_err = EPIPE; - else if (rc == -ECONNREFUSED) - smc->sk.sk_err = ECONNREFUSED; - else if (signal_pending(current)) - smc->sk.sk_err = -sock_intr_errno(timeo); + if (!smc->sk.sk_err) + smc->sk.sk_err = (rc == -EAGAIN) ? EPIPE : -rc; sock_put(&smc->sk); /* passive closing */ goto out; } @@ -3058,18 +3055,17 @@ static int __smc_setsockopt(struct socket *sock, int level, int optname, smc = smc_sk(sk); + /* pre-fetch user data outside the lock */ + if (optname == SMC_LIMIT_HS) { + if (optlen < sizeof(int)) + return -EINVAL; + if (copy_from_sockptr(&val, optval, sizeof(int))) + return -EFAULT; + } + lock_sock(sk); switch (optname) { case SMC_LIMIT_HS: - if (optlen < sizeof(int)) { - rc = -EINVAL; - break; - } - if (copy_from_sockptr(&val, optval, sizeof(int))) { - rc = -EFAULT; - break; - } - smc->limit_smc_hs = !!val; rc = 0; break; diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index c38fc7bf0a7e..014d527d5462 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -788,8 +788,8 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, dclc = (struct smc_clc_msg_decline *)clcm; reason_code = SMC_CLC_DECL_PEERDECL; smc->peer_diagnosis = ntohl(dclc->peer_diagnosis); - if (((struct smc_clc_msg_decline *)buf)->hdr.typev2 & - SMC_FIRST_CONTACT_MASK) { + if ((dclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK) && + smc->conn.lgr) { smc->conn.lgr->sync_err = 1; smc_lgr_terminate_sched(smc->conn.lgr); } diff --git a/net/smc/smc_tracepoint.h b/net/smc/smc_tracepoint.h index a9a6e3c1113a..53da84f57fd6 100644 --- a/net/smc/smc_tracepoint.h +++ b/net/smc/smc_tracepoint.h @@ -51,7 +51,7 @@ DECLARE_EVENT_CLASS(smc_msg_event, __field(const void *, smc) __field(u64, net_cookie) __field(size_t, len) - __string(name, smc->conn.lnk->ibname) + __string(name, smc->conn.lnk ? smc->conn.lnk->ibname : "") ), TP_fast_assign( diff --git a/net/sunrpc/auth_gss/gss_krb5_test.c b/net/sunrpc/auth_gss/gss_krb5_test.c index a5bff02cd7ba..dde1ee934d0d 100644 --- a/net/sunrpc/auth_gss/gss_krb5_test.c +++ b/net/sunrpc/auth_gss/gss_krb5_test.c @@ -63,10 +63,11 @@ static void kdf_case(struct kunit *test) KUNIT_ASSERT_EQ(test, err, 0); /* Assert */ - KUNIT_EXPECT_EQ_MSG(test, - memcmp(param->expected_result->data, - derivedkey.data, derivedkey.len), 0, - "key mismatch"); + KUNIT_EXPECT_MEMEQ_MSG(test, + param->expected_result->data, + derivedkey.data, + derivedkey.len, + "key mismatch"); } static void checksum_case(struct kunit *test) @@ -111,10 +112,11 @@ static void checksum_case(struct kunit *test) KUNIT_ASSERT_EQ(test, err, 0); /* Assert */ - KUNIT_EXPECT_EQ_MSG(test, - memcmp(param->expected_result->data, - checksum.data, checksum.len), 0, - "checksum mismatch"); + KUNIT_EXPECT_MEMEQ_MSG(test, + param->expected_result->data, + checksum.data, + checksum.len, + "checksum mismatch"); crypto_free_ahash(tfm); } @@ -314,10 +316,11 @@ static void rfc3961_nfold_case(struct kunit *test) param->expected_result->len * 8, result); /* Assert */ - KUNIT_EXPECT_EQ_MSG(test, - memcmp(param->expected_result->data, - result, param->expected_result->len), 0, - "result mismatch"); + KUNIT_EXPECT_MEMEQ_MSG(test, + param->expected_result->data, + result, + param->expected_result->len, + "result mismatch"); } static struct kunit_case rfc3961_test_cases[] = { @@ -569,14 +572,16 @@ static void rfc3962_encrypt_case(struct kunit *test) KUNIT_EXPECT_EQ_MSG(test, param->expected_result->len, buf.len, "ciphertext length mismatch"); - KUNIT_EXPECT_EQ_MSG(test, - memcmp(param->expected_result->data, - text, param->expected_result->len), 0, - "ciphertext mismatch"); - KUNIT_EXPECT_EQ_MSG(test, - memcmp(param->next_iv->data, iv, - param->next_iv->len), 0, - "IV mismatch"); + KUNIT_EXPECT_MEMEQ_MSG(test, + param->expected_result->data, + text, + param->expected_result->len, + "ciphertext mismatch"); + KUNIT_EXPECT_MEMEQ_MSG(test, + param->next_iv->data, + iv, + param->next_iv->len, + "IV mismatch"); crypto_free_sync_skcipher(cts_tfm); crypto_free_sync_skcipher(cbc_tfm); @@ -1194,15 +1199,17 @@ static void rfc6803_encrypt_case(struct kunit *test) KUNIT_EXPECT_EQ_MSG(test, param->expected_result->len, buf.len + checksum.len, "ciphertext length mismatch"); - KUNIT_EXPECT_EQ_MSG(test, - memcmp(param->expected_result->data, - buf.head[0].iov_base, buf.len), 0, - "encrypted result mismatch"); - KUNIT_EXPECT_EQ_MSG(test, - memcmp(param->expected_result->data + - (param->expected_result->len - checksum.len), - checksum.data, checksum.len), 0, - "HMAC mismatch"); + KUNIT_EXPECT_MEMEQ_MSG(test, + param->expected_result->data, + buf.head[0].iov_base, + buf.len, + "encrypted result mismatch"); + KUNIT_EXPECT_MEMEQ_MSG(test, + param->expected_result->data + + (param->expected_result->len - checksum.len), + checksum.data, + checksum.len, + "HMAC mismatch"); crypto_free_ahash(ahash_tfm); crypto_free_sync_skcipher(cts_tfm); @@ -1687,15 +1694,16 @@ static void rfc8009_encrypt_case(struct kunit *test) KUNIT_EXPECT_EQ_MSG(test, param->expected_result->len, buf.len, "ciphertext length mismatch"); - KUNIT_EXPECT_EQ_MSG(test, - memcmp(param->expected_result->data, - buf.head[0].iov_base, - param->expected_result->len), 0, - "ciphertext mismatch"); - KUNIT_EXPECT_EQ_MSG(test, memcmp(param->expected_hmac->data, - checksum.data, - checksum.len), 0, - "HMAC mismatch"); + KUNIT_EXPECT_MEMEQ_MSG(test, + param->expected_result->data, + buf.head[0].iov_base, + param->expected_result->len, + "ciphertext mismatch"); + KUNIT_EXPECT_MEMEQ_MSG(test, + param->expected_hmac->data, + checksum.data, + checksum.len, + "HMAC mismatch"); crypto_free_ahash(ahash_tfm); crypto_free_sync_skcipher(cts_tfm); @@ -1826,10 +1834,11 @@ static void encrypt_selftest_case(struct kunit *test) KUNIT_EXPECT_EQ_MSG(test, param->plaintext->len, buf.len, "length mismatch"); - KUNIT_EXPECT_EQ_MSG(test, - memcmp(param->plaintext->data, - buf.head[0].iov_base, buf.len), 0, - "plaintext mismatch"); + KUNIT_EXPECT_MEMEQ_MSG(test, + param->plaintext->data, + buf.head[0].iov_base, + buf.len, + "plaintext mismatch"); crypto_free_sync_skcipher(cts_tfm); crypto_free_sync_skcipher(cbc_tfm); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index ef8b7e8b1e9c..7081c1214e6c 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -134,11 +134,11 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail, return tmp; } + cache_get(new); hlist_add_head_rcu(&new->cache_list, head); detail->entries++; if (detail->nextcheck > new->expiry_time) detail->nextcheck = new->expiry_time + 1; - cache_get(new); spin_unlock(&detail->hash_lock); if (freeme) @@ -233,9 +233,9 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail, spin_lock(&detail->hash_lock); cache_entry_update(detail, tmp, new); - hlist_add_head(&tmp->cache_list, &detail->hash_table[hash]); - detail->entries++; cache_get(tmp); + hlist_add_head_rcu(&tmp->cache_list, &detail->hash_table[hash]); + detail->entries++; cache_fresh_locked(tmp, new->expiry_time, detail); cache_fresh_locked(old, 0, detail); spin_unlock(&detail->hash_lock); @@ -399,7 +399,11 @@ static struct delayed_work cache_cleaner; void sunrpc_init_cache_detail(struct cache_detail *cd) { spin_lock_init(&cd->hash_lock); - INIT_LIST_HEAD(&cd->queue); + INIT_LIST_HEAD(&cd->requests); + INIT_LIST_HEAD(&cd->readers); + spin_lock_init(&cd->queue_lock); + init_waitqueue_head(&cd->queue_wait); + cd->next_seqno = 0; spin_lock(&cache_list_lock); cd->nextcheck = 0; cd->entries = 0; @@ -794,31 +798,20 @@ void cache_clean_deferred(void *owner) * On read, you get a full request, or block. * On write, an update request is processed. * Poll works if anything to read, and always allows write. - * - * Implemented by linked list of requests. Each open file has - * a ->private that also exists in this list. New requests are added - * to the end and may wakeup and preceding readers. - * New readers are added to the head. If, on read, an item is found with - * CACHE_UPCALLING clear, we free it from the list. - * */ -static DEFINE_SPINLOCK(queue_lock); - -struct cache_queue { - struct list_head list; - int reader; /* if 0, then request */ -}; struct cache_request { - struct cache_queue q; + struct list_head list; struct cache_head *item; - char * buf; + char *buf; int len; int readers; + u64 seqno; }; struct cache_reader { - struct cache_queue q; + struct list_head list; int offset; /* if non-0, we have a refcnt on next request */ + u64 next_seqno; }; static int cache_request(struct cache_detail *detail, @@ -833,6 +826,17 @@ static int cache_request(struct cache_detail *detail, return PAGE_SIZE - len; } +static struct cache_request * +cache_next_request(struct cache_detail *cd, u64 seqno) +{ + struct cache_request *rq; + + list_for_each_entry(rq, &cd->requests, list) + if (rq->seqno >= seqno) + return rq; + return NULL; +} + static ssize_t cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos, struct cache_detail *cd) { @@ -847,25 +851,18 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count, inode_lock(inode); /* protect against multiple concurrent * readers on this file */ again: - spin_lock(&queue_lock); + spin_lock(&cd->queue_lock); /* need to find next request */ - while (rp->q.list.next != &cd->queue && - list_entry(rp->q.list.next, struct cache_queue, list) - ->reader) { - struct list_head *next = rp->q.list.next; - list_move(&rp->q.list, next); - } - if (rp->q.list.next == &cd->queue) { - spin_unlock(&queue_lock); + rq = cache_next_request(cd, rp->next_seqno); + if (!rq) { + spin_unlock(&cd->queue_lock); inode_unlock(inode); WARN_ON_ONCE(rp->offset); return 0; } - rq = container_of(rp->q.list.next, struct cache_request, q.list); - WARN_ON_ONCE(rq->q.reader); if (rp->offset == 0) rq->readers++; - spin_unlock(&queue_lock); + spin_unlock(&cd->queue_lock); if (rq->len == 0) { err = cache_request(cd, rq); @@ -876,9 +873,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count, if (rp->offset == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) { err = -EAGAIN; - spin_lock(&queue_lock); - list_move(&rp->q.list, &rq->q.list); - spin_unlock(&queue_lock); + rp->next_seqno = rq->seqno + 1; } else { if (rp->offset + count > rq->len) count = rq->len - rp->offset; @@ -888,26 +883,24 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count, rp->offset += count; if (rp->offset >= rq->len) { rp->offset = 0; - spin_lock(&queue_lock); - list_move(&rp->q.list, &rq->q.list); - spin_unlock(&queue_lock); + rp->next_seqno = rq->seqno + 1; } err = 0; } out: if (rp->offset == 0) { /* need to release rq */ - spin_lock(&queue_lock); + spin_lock(&cd->queue_lock); rq->readers--; if (rq->readers == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) { - list_del(&rq->q.list); - spin_unlock(&queue_lock); + list_del(&rq->list); + spin_unlock(&cd->queue_lock); cache_put(rq->item, cd); kfree(rq->buf); kfree(rq); } else - spin_unlock(&queue_lock); + spin_unlock(&cd->queue_lock); } if (err == -EAGAIN) goto again; @@ -971,16 +964,13 @@ out: return ret; } -static DECLARE_WAIT_QUEUE_HEAD(queue_wait); - static __poll_t cache_poll(struct file *filp, poll_table *wait, struct cache_detail *cd) { __poll_t mask; struct cache_reader *rp = filp->private_data; - struct cache_queue *cq; - poll_wait(filp, &queue_wait, wait); + poll_wait(filp, &cd->queue_wait, wait); /* alway allow write */ mask = EPOLLOUT | EPOLLWRNORM; @@ -988,15 +978,11 @@ static __poll_t cache_poll(struct file *filp, poll_table *wait, if (!rp) return mask; - spin_lock(&queue_lock); + spin_lock(&cd->queue_lock); - for (cq= &rp->q; &cq->list != &cd->queue; - cq = list_entry(cq->list.next, struct cache_queue, list)) - if (!cq->reader) { - mask |= EPOLLIN | EPOLLRDNORM; - break; - } - spin_unlock(&queue_lock); + if (cache_next_request(cd, rp->next_seqno)) + mask |= EPOLLIN | EPOLLRDNORM; + spin_unlock(&cd->queue_lock); return mask; } @@ -1006,25 +992,20 @@ static int cache_ioctl(struct inode *ino, struct file *filp, { int len = 0; struct cache_reader *rp = filp->private_data; - struct cache_queue *cq; + struct cache_request *rq; if (cmd != FIONREAD || !rp) return -EINVAL; - spin_lock(&queue_lock); + spin_lock(&cd->queue_lock); /* only find the length remaining in current request, * or the length of the next request */ - for (cq= &rp->q; &cq->list != &cd->queue; - cq = list_entry(cq->list.next, struct cache_queue, list)) - if (!cq->reader) { - struct cache_request *cr = - container_of(cq, struct cache_request, q); - len = cr->len - rp->offset; - break; - } - spin_unlock(&queue_lock); + rq = cache_next_request(cd, rp->next_seqno); + if (rq) + len = rq->len - rp->offset; + spin_unlock(&cd->queue_lock); return put_user(len, (int __user *)arg); } @@ -1044,11 +1025,11 @@ static int cache_open(struct inode *inode, struct file *filp, return -ENOMEM; } rp->offset = 0; - rp->q.reader = 1; + rp->next_seqno = 0; - spin_lock(&queue_lock); - list_add(&rp->q.list, &cd->queue); - spin_unlock(&queue_lock); + spin_lock(&cd->queue_lock); + list_add(&rp->list, &cd->readers); + spin_unlock(&cd->queue_lock); } if (filp->f_mode & FMODE_WRITE) atomic_inc(&cd->writers); @@ -1064,29 +1045,24 @@ static int cache_release(struct inode *inode, struct file *filp, if (rp) { struct cache_request *rq = NULL; - spin_lock(&queue_lock); + spin_lock(&cd->queue_lock); if (rp->offset) { - struct cache_queue *cq; - for (cq = &rp->q; &cq->list != &cd->queue; - cq = list_entry(cq->list.next, - struct cache_queue, list)) - if (!cq->reader) { - struct cache_request *cr = - container_of(cq, - struct cache_request, q); - cr->readers--; - if (cr->readers == 0 && - !test_bit(CACHE_PENDING, - &cr->item->flags)) { - list_del(&cr->q.list); - rq = cr; - } - break; + struct cache_request *cr; + + cr = cache_next_request(cd, rp->next_seqno); + if (cr) { + cr->readers--; + if (cr->readers == 0 && + !test_bit(CACHE_PENDING, + &cr->item->flags)) { + list_del(&cr->list); + rq = cr; } + } rp->offset = 0; } - list_del(&rp->q.list); - spin_unlock(&queue_lock); + list_del(&rp->list); + spin_unlock(&cd->queue_lock); if (rq) { cache_put(rq->item, cd); @@ -1109,27 +1085,24 @@ static int cache_release(struct inode *inode, struct file *filp, static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch) { - struct cache_queue *cq, *tmp; - struct cache_request *cr; + struct cache_request *cr, *tmp; LIST_HEAD(dequeued); - spin_lock(&queue_lock); - list_for_each_entry_safe(cq, tmp, &detail->queue, list) - if (!cq->reader) { - cr = container_of(cq, struct cache_request, q); - if (cr->item != ch) - continue; - if (test_bit(CACHE_PENDING, &ch->flags)) - /* Lost a race and it is pending again */ - break; - if (cr->readers != 0) - continue; - list_move(&cr->q.list, &dequeued); - } - spin_unlock(&queue_lock); + spin_lock(&detail->queue_lock); + list_for_each_entry_safe(cr, tmp, &detail->requests, list) { + if (cr->item != ch) + continue; + if (test_bit(CACHE_PENDING, &ch->flags)) + /* Lost a race and it is pending again */ + break; + if (cr->readers != 0) + continue; + list_move(&cr->list, &dequeued); + } + spin_unlock(&detail->queue_lock); while (!list_empty(&dequeued)) { - cr = list_entry(dequeued.next, struct cache_request, q.list); - list_del(&cr->q.list); + cr = list_entry(dequeued.next, struct cache_request, list); + list_del(&cr->list); cache_put(cr->item, detail); kfree(cr->buf); kfree(cr); @@ -1247,20 +1220,20 @@ static int cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) return -EAGAIN; } - crq->q.reader = 0; crq->buf = buf; crq->len = 0; crq->readers = 0; - spin_lock(&queue_lock); + spin_lock(&detail->queue_lock); if (test_bit(CACHE_PENDING, &h->flags)) { crq->item = cache_get(h); - list_add_tail(&crq->q.list, &detail->queue); + crq->seqno = detail->next_seqno++; + list_add_tail(&crq->list, &detail->requests); trace_cache_entry_upcall(detail, h); } else /* Lost a race, no longer PENDING, so don't enqueue */ ret = -EAGAIN; - spin_unlock(&queue_lock); - wake_up(&queue_wait); + spin_unlock(&detail->queue_lock); + wake_up(&detail->queue_wait); if (ret == -EAGAIN) { kfree(buf); kfree(crq); @@ -1378,18 +1351,14 @@ static void *__cache_seq_start(struct seq_file *m, loff_t *pos) hlist_for_each_entry_rcu(ch, &cd->hash_table[hash], cache_list) if (!entry--) return ch; - n &= ~((1LL<<32) - 1); - do { - hash++; - n += 1LL<<32; - } while(hash < cd->hash_size && - hlist_empty(&cd->hash_table[hash])); - if (hash >= cd->hash_size) - return NULL; - *pos = n+1; - return hlist_entry_safe(rcu_dereference_raw( + ch = NULL; + while (!ch && ++hash < cd->hash_size) + ch = hlist_entry_safe(rcu_dereference( hlist_first_rcu(&cd->hash_table[hash])), struct cache_head, cache_list); + + *pos = ((long long)hash << 32) + 1; + return ch; } static void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos) @@ -1398,29 +1367,29 @@ static void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos) int hash = (*pos >> 32); struct cache_detail *cd = m->private; - if (p == SEQ_START_TOKEN) + if (p == SEQ_START_TOKEN) { hash = 0; - else if (ch->cache_list.next == NULL) { - hash++; - *pos += 1LL<<32; - } else { - ++*pos; - return hlist_entry_safe(rcu_dereference_raw( - hlist_next_rcu(&ch->cache_list)), - struct cache_head, cache_list); + ch = NULL; } - *pos &= ~((1LL<<32) - 1); - while (hash < cd->hash_size && - hlist_empty(&cd->hash_table[hash])) { + while (hash < cd->hash_size) { + if (ch) + ch = hlist_entry_safe( + rcu_dereference( + hlist_next_rcu(&ch->cache_list)), + struct cache_head, cache_list); + else + ch = hlist_entry_safe( + rcu_dereference( + hlist_first_rcu(&cd->hash_table[hash])), + struct cache_head, cache_list); + if (ch) { + ++*pos; + return ch; + } hash++; - *pos += 1LL<<32; + *pos = (long long)hash << 32; } - if (hash >= cd->hash_size) - return NULL; - ++*pos; - return hlist_entry_safe(rcu_dereference_raw( - hlist_first_rcu(&cd->hash_table[hash])), - struct cache_head, cache_list); + return NULL; } void *cache_seq_start_rcu(struct seq_file *m, loff_t *pos) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index d8ccb8e4b5c2..576fa42e7abf 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -638,13 +638,25 @@ svc_init_buffer(struct svc_rqst *rqstp, const struct svc_serv *serv, int node) { rqstp->rq_maxpages = svc_serv_maxpages(serv); - /* rq_pages' last entry is NULL for historical reasons. */ + /* +1 for a NULL sentinel readable by nfsd_splice_actor() */ rqstp->rq_pages = kcalloc_node(rqstp->rq_maxpages + 1, sizeof(struct page *), GFP_KERNEL, node); if (!rqstp->rq_pages) return false; + /* +1 for a NULL sentinel at rq_page_end (see svc_rqst_replace_page) */ + rqstp->rq_respages = kcalloc_node(rqstp->rq_maxpages + 1, + sizeof(struct page *), + GFP_KERNEL, node); + if (!rqstp->rq_respages) { + kfree(rqstp->rq_pages); + rqstp->rq_pages = NULL; + return false; + } + + rqstp->rq_pages_nfree = rqstp->rq_maxpages; + rqstp->rq_next_page = rqstp->rq_respages + rqstp->rq_maxpages; return true; } @@ -656,10 +668,19 @@ svc_release_buffer(struct svc_rqst *rqstp) { unsigned long i; - for (i = 0; i < rqstp->rq_maxpages; i++) - if (rqstp->rq_pages[i]) - put_page(rqstp->rq_pages[i]); - kfree(rqstp->rq_pages); + if (rqstp->rq_pages) { + for (i = 0; i < rqstp->rq_maxpages; i++) + if (rqstp->rq_pages[i]) + put_page(rqstp->rq_pages[i]); + kfree(rqstp->rq_pages); + } + + if (rqstp->rq_respages) { + for (i = 0; i < rqstp->rq_maxpages; i++) + if (rqstp->rq_respages[i]) + put_page(rqstp->rq_respages[i]); + kfree(rqstp->rq_respages); + } } static void @@ -934,11 +955,11 @@ svc_set_num_threads(struct svc_serv *serv, unsigned int min_threads, EXPORT_SYMBOL_GPL(svc_set_num_threads); /** - * svc_rqst_replace_page - Replace one page in rq_pages[] + * svc_rqst_replace_page - Replace one page in rq_respages[] * @rqstp: svc_rqst with pages to replace * @page: replacement page * - * When replacing a page in rq_pages, batch the release of the + * When replacing a page in rq_respages, batch the release of the * replaced pages to avoid hammering the page allocator. * * Return values: @@ -947,19 +968,16 @@ EXPORT_SYMBOL_GPL(svc_set_num_threads); */ bool svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page) { - struct page **begin = rqstp->rq_pages; - struct page **end = &rqstp->rq_pages[rqstp->rq_maxpages]; + struct page **begin = rqstp->rq_respages; + struct page **end = rqstp->rq_page_end; if (unlikely(rqstp->rq_next_page < begin || rqstp->rq_next_page > end)) { trace_svc_replace_page_err(rqstp); return false; } - if (*rqstp->rq_next_page) { - if (!folio_batch_add(&rqstp->rq_fbatch, - page_folio(*rqstp->rq_next_page))) - __folio_batch_release(&rqstp->rq_fbatch); - } + if (*rqstp->rq_next_page) + svc_rqst_page_release(rqstp, *rqstp->rq_next_page); get_page(page); *(rqstp->rq_next_page++) = page; @@ -971,18 +989,24 @@ EXPORT_SYMBOL_GPL(svc_rqst_replace_page); * svc_rqst_release_pages - Release Reply buffer pages * @rqstp: RPC transaction context * - * Release response pages that might still be in flight after - * svc_send, and any spliced filesystem-owned pages. + * Release response pages in the range [rq_respages, rq_next_page). + * NULL entries in this range are skipped, allowing transports to + * transfer pages to a send context before this function runs. */ void svc_rqst_release_pages(struct svc_rqst *rqstp) { - int i, count = rqstp->rq_next_page - rqstp->rq_respages; - - if (count) { - release_pages(rqstp->rq_respages, count); - for (i = 0; i < count; i++) - rqstp->rq_respages[i] = NULL; + struct page **pp; + + for (pp = rqstp->rq_respages; pp < rqstp->rq_next_page; pp++) { + if (*pp) { + if (!folio_batch_add(&rqstp->rq_fbatch, + page_folio(*pp))) + __folio_batch_release(&rqstp->rq_fbatch); + *pp = NULL; + } } + if (rqstp->rq_fbatch.nr) + __folio_batch_release(&rqstp->rq_fbatch); } /** diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 56a663b8939f..b16e710926c1 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -650,14 +650,13 @@ static void svc_check_conn_limits(struct svc_serv *serv) } } -static bool svc_alloc_arg(struct svc_rqst *rqstp) +static bool svc_fill_pages(struct svc_rqst *rqstp, struct page **pages, + unsigned long npages) { - struct xdr_buf *arg = &rqstp->rq_arg; - unsigned long pages, filled, ret; + unsigned long filled, ret; - pages = rqstp->rq_maxpages; - for (filled = 0; filled < pages; filled = ret) { - ret = alloc_pages_bulk(GFP_KERNEL, pages, rqstp->rq_pages); + for (filled = 0; filled < npages; filled = ret) { + ret = alloc_pages_bulk(GFP_KERNEL, npages, pages); if (ret > filled) /* Made progress, don't sleep yet */ continue; @@ -667,11 +666,40 @@ static bool svc_alloc_arg(struct svc_rqst *rqstp) set_current_state(TASK_RUNNING); return false; } - trace_svc_alloc_arg_err(pages, ret); + trace_svc_alloc_arg_err(npages, ret); memalloc_retry_wait(GFP_KERNEL); } - rqstp->rq_page_end = &rqstp->rq_pages[pages]; - rqstp->rq_pages[pages] = NULL; /* this might be seen in nfsd_splice_actor() */ + return true; +} + +static bool svc_alloc_arg(struct svc_rqst *rqstp) +{ + struct xdr_buf *arg = &rqstp->rq_arg; + unsigned long pages, nfree; + + pages = rqstp->rq_maxpages; + + nfree = rqstp->rq_pages_nfree; + if (nfree) { + if (!svc_fill_pages(rqstp, rqstp->rq_pages, nfree)) + return false; + rqstp->rq_pages_nfree = 0; + } + + if (WARN_ON_ONCE(rqstp->rq_next_page < rqstp->rq_respages)) + return false; + nfree = rqstp->rq_next_page - rqstp->rq_respages; + if (nfree) { + if (!svc_fill_pages(rqstp, rqstp->rq_respages, nfree)) + return false; + } + + rqstp->rq_next_page = rqstp->rq_respages; + rqstp->rq_page_end = &rqstp->rq_respages[pages]; + /* svc_rqst_replace_page() dereferences *rq_next_page even + * at rq_page_end; NULL prevents releasing a garbage page. + */ + rqstp->rq_page_end[0] = NULL; /* Make arg->head point to first page and arg->pages point to rest */ arg->head[0].iov_base = page_address(rqstp->rq_pages[0]); @@ -1277,7 +1305,6 @@ static noinline int svc_deferred_recv(struct svc_rqst *rqstp) rqstp->rq_addrlen = dr->addrlen; /* Save off transport header len in case we get deferred again */ rqstp->rq_daddr = dr->daddr; - rqstp->rq_respages = rqstp->rq_pages; rqstp->rq_xprt_ctxt = dr->xprt_ctxt; dr->xprt_ctxt = NULL; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index f28c6076f7e8..7be3de1a1aed 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -351,8 +351,6 @@ static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen, for (i = 0, t = 0; t < buflen; i++, t += PAGE_SIZE) bvec_set_page(&bvec[i], rqstp->rq_pages[i], PAGE_SIZE, 0); - rqstp->rq_respages = &rqstp->rq_pages[i]; - rqstp->rq_next_page = rqstp->rq_respages + 1; iov_iter_bvec(&msg.msg_iter, ITER_DEST, bvec, i, buflen); if (seek) { @@ -677,13 +675,9 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) if (len <= rqstp->rq_arg.head[0].iov_len) { rqstp->rq_arg.head[0].iov_len = len; rqstp->rq_arg.page_len = 0; - rqstp->rq_respages = rqstp->rq_pages+1; } else { rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; - rqstp->rq_respages = rqstp->rq_pages + 1 + - DIV_ROUND_UP(rqstp->rq_arg.page_len, PAGE_SIZE); } - rqstp->rq_next_page = rqstp->rq_respages+1; if (serv->sv_stats) serv->sv_stats->netudpcnt++; @@ -994,7 +988,7 @@ static size_t svc_tcp_restore_pages(struct svc_sock *svsk, npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < npages; i++) { if (rqstp->rq_pages[i] != NULL) - put_page(rqstp->rq_pages[i]); + svc_rqst_page_release(rqstp, rqstp->rq_pages[i]); BUG_ON(svsk->sk_pages[i] == NULL); rqstp->rq_pages[i] = svsk->sk_pages[i]; svsk->sk_pages[i] = NULL; @@ -1015,6 +1009,7 @@ static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp) svsk->sk_pages[i] = rqstp->rq_pages[i]; rqstp->rq_pages[i] = NULL; } + rqstp->rq_pages_nfree = npages; } static void svc_tcp_clear_pages(struct svc_sock *svsk) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 4fbb57a29704..48a3618cbb29 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1663,6 +1663,22 @@ void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task) } EXPORT_SYMBOL_GPL(xprt_add_backlog); +/** + * xprt_add_backlog_noncongested - queue task on backlog + * @xprt: transport whose backlog queue receives the task + * @task: task to queue + * + * Like xprt_add_backlog, but does not set XPRT_CONGESTED. + * For transports whose free_slot path does not synchronize + * with xprt_throttle_congested via reserve_lock. + */ +void xprt_add_backlog_noncongested(struct rpc_xprt *xprt, + struct rpc_task *task) +{ + rpc_sleep_on(&xprt->backlog, task, xprt_complete_request_init); +} +EXPORT_SYMBOL_GPL(xprt_add_backlog_noncongested); + static bool __xprt_set_rq(struct rpc_task *task, void *data) { struct rpc_rqst *req = data; diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 31434aeb8e29..7f79a0a2601e 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -244,9 +244,10 @@ int frwr_query_device(struct rpcrdma_ep *ep, const struct ib_device *device) } ep->re_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS; ep->re_attr.cap.max_send_wr += 1; /* for ib_drain_sq */ + ep->re_recv_batch = ep->re_max_requests >> 2; ep->re_attr.cap.max_recv_wr = ep->re_max_requests; ep->re_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; - ep->re_attr.cap.max_recv_wr += RPCRDMA_MAX_RECV_BATCH; + ep->re_attr.cap.max_recv_wr += ep->re_recv_batch; ep->re_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */ ep->re_max_rdma_segs = @@ -268,10 +269,9 @@ int frwr_query_device(struct rpcrdma_ep *ep, const struct ib_device *device) } /** - * frwr_map - Register a memory region + * frwr_map - Register a memory region from an xdr_buf cursor * @r_xprt: controlling transport - * @seg: memory region co-ordinates - * @nsegs: number of segments remaining + * @cur: cursor tracking position within the xdr_buf * @writing: true when RDMA Write will be used * @xid: XID of RPC using the registered memory * @mr: MR to fill in @@ -279,34 +279,104 @@ int frwr_query_device(struct rpcrdma_ep *ep, const struct ib_device *device) * Prepare a REG_MR Work Request to register a memory region * for remote access via RDMA READ or RDMA WRITE. * - * Returns the next segment or a negative errno pointer. - * On success, @mr is filled in. + * Returns 0 on success (cursor advanced past consumed data, + * @mr populated) or a negative errno on failure. */ -struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, - struct rpcrdma_mr_seg *seg, - int nsegs, bool writing, __be32 xid, - struct rpcrdma_mr *mr) +int frwr_map(struct rpcrdma_xprt *r_xprt, + struct rpcrdma_xdr_cursor *cur, + bool writing, __be32 xid, + struct rpcrdma_mr *mr) { struct rpcrdma_ep *ep = r_xprt->rx_ep; + const struct xdr_buf *xdrbuf = cur->xc_buf; + bool sg_gaps = ep->re_mrtype == IB_MR_TYPE_SG_GAPS; + unsigned int max_depth = ep->re_max_fr_depth; struct ib_reg_wr *reg_wr; int i, n, dma_nents; struct ib_mr *ibmr; u8 key; - if (nsegs > ep->re_max_fr_depth) - nsegs = ep->re_max_fr_depth; - for (i = 0; i < nsegs;) { - sg_set_page(&mr->mr_sg[i], seg->mr_page, - seg->mr_len, seg->mr_offset); - - ++seg; - ++i; - if (ep->re_mrtype == IB_MR_TYPE_SG_GAPS) - continue; - if ((i < nsegs && seg->mr_offset) || - offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) - break; + i = 0; + + /* Head kvec */ + if (!(cur->xc_flags & XC_HEAD_DONE)) { + const struct kvec *head = &xdrbuf->head[0]; + + sg_set_page(&mr->mr_sg[i], + virt_to_page(head->iov_base), + head->iov_len, + offset_in_page(head->iov_base)); + cur->xc_flags |= XC_HEAD_DONE; + i++; + /* Without sg-gap support, each non-contiguous region + * must be registered as a separate MR. Returning + * here after the head kvec causes the caller to + * invoke frwr_map() again for the page list and + * tail. + */ + if (!sg_gaps) + goto finish; } + + /* Page list */ + if (!(cur->xc_flags & XC_PAGES_DONE) && xdrbuf->page_len) { + unsigned int page_base, remaining; + struct page **ppages; + + remaining = xdrbuf->page_len - cur->xc_page_offset; + page_base = offset_in_page(xdrbuf->page_base + + cur->xc_page_offset); + ppages = xdrbuf->pages + + ((xdrbuf->page_base + cur->xc_page_offset) + >> PAGE_SHIFT); + + while (remaining > 0 && i < max_depth) { + unsigned int len; + + len = min_t(unsigned int, + PAGE_SIZE - page_base, remaining); + sg_set_page(&mr->mr_sg[i], *ppages, + len, page_base); + cur->xc_page_offset += len; + i++; + ppages++; + remaining -= len; + + if (!sg_gaps && remaining > 0 && + offset_in_page(page_base + len)) + goto finish; + page_base = 0; + } + if (remaining == 0) + cur->xc_flags |= XC_PAGES_DONE; + } else if (!(cur->xc_flags & XC_PAGES_DONE)) { + cur->xc_flags |= XC_PAGES_DONE; + } + + /* Tail kvec */ + if (!(cur->xc_flags & XC_TAIL_DONE) && xdrbuf->tail[0].iov_len && + i < max_depth) { + const struct kvec *tail = &xdrbuf->tail[0]; + + if (!sg_gaps && i > 0) { + struct scatterlist *prev = &mr->mr_sg[i - 1]; + + if (offset_in_page(prev->offset + prev->length) || + offset_in_page(tail->iov_base)) + goto finish; + } + sg_set_page(&mr->mr_sg[i], + virt_to_page(tail->iov_base), + tail->iov_len, + offset_in_page(tail->iov_base)); + cur->xc_flags |= XC_TAIL_DONE; + i++; + } else if (!(cur->xc_flags & XC_TAIL_DONE) && + !xdrbuf->tail[0].iov_len) { + cur->xc_flags |= XC_TAIL_DONE; + } + +finish: mr->mr_dir = rpcrdma_data_dir(writing); mr->mr_nents = i; @@ -338,15 +408,15 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, mr->mr_offset = ibmr->iova; trace_xprtrdma_mr_map(mr); - return seg; + return 0; out_dmamap_err: trace_xprtrdma_frwr_sgerr(mr, i); - return ERR_PTR(-EIO); + return -EIO; out_mapmr_err: trace_xprtrdma_frwr_maperr(mr, n); - return ERR_PTR(-EIO); + return -EIO; } /** @@ -669,9 +739,13 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) */ int frwr_wp_create(struct rpcrdma_xprt *r_xprt) { + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_ep *ep = r_xprt->rx_ep; - struct rpcrdma_mr_seg seg; + struct ib_reg_wr *reg_wr; struct rpcrdma_mr *mr; + struct ib_mr *ibmr; + int dma_nents; + int ret; mr = rpcrdma_mr_get(r_xprt); if (!mr) @@ -679,11 +753,39 @@ int frwr_wp_create(struct rpcrdma_xprt *r_xprt) mr->mr_req = NULL; ep->re_write_pad_mr = mr; - seg.mr_len = XDR_UNIT; - seg.mr_page = virt_to_page(ep->re_write_pad); - seg.mr_offset = offset_in_page(ep->re_write_pad); - if (IS_ERR(frwr_map(r_xprt, &seg, 1, true, xdr_zero, mr))) - return -EIO; + sg_init_table(mr->mr_sg, 1); + sg_set_page(mr->mr_sg, virt_to_page(ep->re_write_pad), + XDR_UNIT, offset_in_page(ep->re_write_pad)); + + mr->mr_dir = DMA_FROM_DEVICE; + mr->mr_nents = 1; + dma_nents = ib_dma_map_sg(ep->re_id->device, mr->mr_sg, + mr->mr_nents, mr->mr_dir); + if (!dma_nents) { + ret = -EIO; + goto out_mr; + } + mr->mr_device = ep->re_id->device; + + ibmr = mr->mr_ibmr; + if (ib_map_mr_sg(ibmr, mr->mr_sg, dma_nents, NULL, + PAGE_SIZE) != dma_nents) { + ret = -EIO; + goto out_unmap; + } + + /* IOVA is not tagged with an XID; the write-pad is not RPC-specific. */ + ib_update_fast_reg_key(ibmr, ib_inc_rkey(ibmr->rkey)); + + reg_wr = &mr->mr_regwr; + reg_wr->mr = ibmr; + reg_wr->key = ibmr->rkey; + reg_wr->access = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE; + + mr->mr_handle = ibmr->rkey; + mr->mr_length = ibmr->length; + mr->mr_offset = ibmr->iova; + trace_xprtrdma_mr_fastreg(mr); mr->mr_cqe.done = frwr_wc_fastreg; @@ -693,5 +795,16 @@ int frwr_wp_create(struct rpcrdma_xprt *r_xprt) mr->mr_regwr.wr.opcode = IB_WR_REG_MR; mr->mr_regwr.wr.send_flags = 0; - return ib_post_send(ep->re_id->qp, &mr->mr_regwr.wr, NULL); + ret = ib_post_send(ep->re_id->qp, &mr->mr_regwr.wr, NULL); + if (!ret) + return 0; + +out_unmap: + frwr_mr_unmap(mr); +out_mr: + ep->re_write_pad_mr = NULL; + spin_lock(&buf->rb_lock); + rpcrdma_mr_push(mr, &buf->rb_mrs); + spin_unlock(&buf->rb_lock); + return ret; } diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 3aac1456e23e..0e0f21974710 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -200,67 +200,30 @@ rpcrdma_alloc_sparse_pages(struct xdr_buf *buf) return 0; } -/* Convert @vec to a single SGL element. - * - * Returns pointer to next available SGE, and bumps the total number - * of SGEs consumed. - */ -static struct rpcrdma_mr_seg * -rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg, - unsigned int *n) +static void +rpcrdma_xdr_cursor_init(struct rpcrdma_xdr_cursor *cur, + const struct xdr_buf *xdrbuf, + unsigned int pos, enum rpcrdma_chunktype type) { - seg->mr_page = virt_to_page(vec->iov_base); - seg->mr_offset = offset_in_page(vec->iov_base); - seg->mr_len = vec->iov_len; - ++seg; - ++(*n); - return seg; + cur->xc_buf = xdrbuf; + cur->xc_page_offset = 0; + cur->xc_flags = 0; + + if (pos != 0) + cur->xc_flags |= XC_HEAD_DONE; + if (!xdrbuf->page_len) + cur->xc_flags |= XC_PAGES_DONE; + if (type == rpcrdma_readch || type == rpcrdma_writech || + !xdrbuf->tail[0].iov_len) + cur->xc_flags |= XC_TAIL_DONE; } -/* Convert @xdrbuf into SGEs no larger than a page each. As they - * are registered, these SGEs are then coalesced into RDMA segments - * when the selected memreg mode supports it. - * - * Returns positive number of SGEs consumed, or a negative errno. - */ - -static int -rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf, - unsigned int pos, enum rpcrdma_chunktype type, - struct rpcrdma_mr_seg *seg) +static bool +rpcrdma_xdr_cursor_done(const struct rpcrdma_xdr_cursor *cur) { - unsigned long page_base; - unsigned int len, n; - struct page **ppages; - - n = 0; - if (pos == 0) - seg = rpcrdma_convert_kvec(&xdrbuf->head[0], seg, &n); - - len = xdrbuf->page_len; - ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT); - page_base = offset_in_page(xdrbuf->page_base); - while (len) { - seg->mr_page = *ppages; - seg->mr_offset = page_base; - seg->mr_len = min_t(u32, PAGE_SIZE - page_base, len); - len -= seg->mr_len; - ++ppages; - ++seg; - ++n; - page_base = 0; - } - - if (type == rpcrdma_readch || type == rpcrdma_writech) - goto out; - - if (xdrbuf->tail[0].iov_len) - rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, &n); - -out: - if (unlikely(n > RPCRDMA_MAX_SEGS)) - return -EIO; - return n; + return (cur->xc_flags & (XC_HEAD_DONE | XC_PAGES_DONE | + XC_TAIL_DONE)) == + (XC_HEAD_DONE | XC_PAGES_DONE | XC_TAIL_DONE); } static int @@ -292,11 +255,10 @@ encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr, return 0; } -static struct rpcrdma_mr_seg *rpcrdma_mr_prepare(struct rpcrdma_xprt *r_xprt, - struct rpcrdma_req *req, - struct rpcrdma_mr_seg *seg, - int nsegs, bool writing, - struct rpcrdma_mr **mr) +static int rpcrdma_mr_prepare(struct rpcrdma_xprt *r_xprt, + struct rpcrdma_req *req, + struct rpcrdma_xdr_cursor *cur, + bool writing, struct rpcrdma_mr **mr) { *mr = rpcrdma_mr_pop(&req->rl_free_mrs); if (!*mr) { @@ -307,13 +269,13 @@ static struct rpcrdma_mr_seg *rpcrdma_mr_prepare(struct rpcrdma_xprt *r_xprt, } rpcrdma_mr_push(*mr, &req->rl_registered); - return frwr_map(r_xprt, seg, nsegs, writing, req->rl_slot.rq_xid, *mr); + return frwr_map(r_xprt, cur, writing, req->rl_slot.rq_xid, *mr); out_getmr_err: trace_xprtrdma_nomrs_err(r_xprt, req); xprt_wait_for_buffer_space(&r_xprt->rx_xprt); rpcrdma_mrs_refresh(r_xprt); - return ERR_PTR(-EAGAIN); + return -EAGAIN; } /* Register and XDR encode the Read list. Supports encoding a list of read @@ -336,10 +298,10 @@ static int rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, enum rpcrdma_chunktype rtype) { struct xdr_stream *xdr = &req->rl_stream; - struct rpcrdma_mr_seg *seg; + struct rpcrdma_xdr_cursor cur; struct rpcrdma_mr *mr; unsigned int pos; - int nsegs; + int ret; if (rtype == rpcrdma_noch_pullup || rtype == rpcrdma_noch_mapped) goto done; @@ -347,24 +309,20 @@ static int rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, pos = rqst->rq_snd_buf.head[0].iov_len; if (rtype == rpcrdma_areadch) pos = 0; - seg = req->rl_segments; - nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos, - rtype, seg); - if (nsegs < 0) - return nsegs; + rpcrdma_xdr_cursor_init(&cur, &rqst->rq_snd_buf, pos, rtype); do { - seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, false, &mr); - if (IS_ERR(seg)) - return PTR_ERR(seg); + ret = rpcrdma_mr_prepare(r_xprt, req, &cur, false, &mr); + if (ret) + return ret; if (encode_read_segment(xdr, mr, pos) < 0) return -EMSGSIZE; - trace_xprtrdma_chunk_read(rqst->rq_task, pos, mr, nsegs); + trace_xprtrdma_chunk_read(rqst->rq_task, pos, mr, + rpcrdma_xdr_cursor_done(&cur)); r_xprt->rx_stats.read_chunk_count++; - nsegs -= mr->mr_nents; - } while (nsegs); + } while (!rpcrdma_xdr_cursor_done(&cur)); done: if (xdr_stream_encode_item_absent(xdr) < 0) @@ -394,20 +352,16 @@ static int rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, { struct xdr_stream *xdr = &req->rl_stream; struct rpcrdma_ep *ep = r_xprt->rx_ep; - struct rpcrdma_mr_seg *seg; + struct rpcrdma_xdr_cursor cur; struct rpcrdma_mr *mr; - int nsegs, nchunks; + int nchunks, ret; __be32 *segcount; if (wtype != rpcrdma_writech) goto done; - seg = req->rl_segments; - nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, - rqst->rq_rcv_buf.head[0].iov_len, - wtype, seg); - if (nsegs < 0) - return nsegs; + rpcrdma_xdr_cursor_init(&cur, &rqst->rq_rcv_buf, + rqst->rq_rcv_buf.head[0].iov_len, wtype); if (xdr_stream_encode_item_present(xdr) < 0) return -EMSGSIZE; @@ -418,30 +372,30 @@ static int rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, nchunks = 0; do { - seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, true, &mr); - if (IS_ERR(seg)) - return PTR_ERR(seg); + ret = rpcrdma_mr_prepare(r_xprt, req, &cur, true, &mr); + if (ret) + return ret; if (encode_rdma_segment(xdr, mr) < 0) return -EMSGSIZE; - trace_xprtrdma_chunk_write(rqst->rq_task, mr, nsegs); + trace_xprtrdma_chunk_write(rqst->rq_task, mr, + rpcrdma_xdr_cursor_done(&cur)); r_xprt->rx_stats.write_chunk_count++; r_xprt->rx_stats.total_rdma_request += mr->mr_length; nchunks++; - nsegs -= mr->mr_nents; - } while (nsegs); + } while (!rpcrdma_xdr_cursor_done(&cur)); if (xdr_pad_size(rqst->rq_rcv_buf.page_len)) { if (encode_rdma_segment(xdr, ep->re_write_pad_mr) < 0) return -EMSGSIZE; trace_xprtrdma_chunk_wp(rqst->rq_task, ep->re_write_pad_mr, - nsegs); + true); r_xprt->rx_stats.write_chunk_count++; - r_xprt->rx_stats.total_rdma_request += mr->mr_length; + r_xprt->rx_stats.total_rdma_request += + ep->re_write_pad_mr->mr_length; nchunks++; - nsegs -= mr->mr_nents; } /* Update count of segments in this Write chunk */ @@ -471,9 +425,9 @@ static int rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, enum rpcrdma_chunktype wtype) { struct xdr_stream *xdr = &req->rl_stream; - struct rpcrdma_mr_seg *seg; + struct rpcrdma_xdr_cursor cur; struct rpcrdma_mr *mr; - int nsegs, nchunks; + int nchunks, ret; __be32 *segcount; if (wtype != rpcrdma_replych) { @@ -482,10 +436,7 @@ static int rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, return 0; } - seg = req->rl_segments; - nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg); - if (nsegs < 0) - return nsegs; + rpcrdma_xdr_cursor_init(&cur, &rqst->rq_rcv_buf, 0, wtype); if (xdr_stream_encode_item_present(xdr) < 0) return -EMSGSIZE; @@ -496,19 +447,19 @@ static int rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, nchunks = 0; do { - seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, true, &mr); - if (IS_ERR(seg)) - return PTR_ERR(seg); + ret = rpcrdma_mr_prepare(r_xprt, req, &cur, true, &mr); + if (ret) + return ret; if (encode_rdma_segment(xdr, mr) < 0) return -EMSGSIZE; - trace_xprtrdma_chunk_reply(rqst->rq_task, mr, nsegs); + trace_xprtrdma_chunk_reply(rqst->rq_task, mr, + rpcrdma_xdr_cursor_done(&cur)); r_xprt->rx_stats.reply_chunk_count++; r_xprt->rx_stats.total_rdma_request += mr->mr_length; nchunks++; - nsegs -= mr->mr_nents; - } while (nsegs); + } while (!rpcrdma_xdr_cursor_done(&cur)); /* Update count of segments in the Reply chunk */ *segcount = cpu_to_be32(nchunks); @@ -1471,7 +1422,6 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) credits = 1; /* don't deadlock */ else if (credits > r_xprt->rx_ep->re_max_requests) credits = r_xprt->rx_ep->re_max_requests; - rpcrdma_post_recvs(r_xprt, credits + (buf->rb_bc_srv_max_requests << 1)); if (buf->rb_credits != credits) rpcrdma_update_cwnd(r_xprt, credits); @@ -1490,15 +1440,20 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) /* LocalInv completion will complete the RPC */ else kref_put(&req->rl_kref, rpcrdma_reply_done); - return; -out_badversion: - trace_xprtrdma_reply_vers_err(rep); - goto out; +out_post: + rpcrdma_post_recvs(r_xprt, + credits + (buf->rb_bc_srv_max_requests << 1)); + return; out_norqst: spin_unlock(&xprt->queue_lock); trace_xprtrdma_reply_rqst_err(rep); + rpcrdma_rep_put(buf, rep); + goto out_post; + +out_badversion: + trace_xprtrdma_reply_vers_err(rep); goto out; out_shortreply: diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index e7e4a39ca6c6..f8a0638eb095 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -118,7 +118,8 @@ svc_rdma_next_recv_ctxt(struct list_head *list) static struct svc_rdma_recv_ctxt * svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma) { - int node = ibdev_to_node(rdma->sc_cm_id->device); + struct ib_device *device = rdma->sc_cm_id->device; + int node = ibdev_to_node(device); struct svc_rdma_recv_ctxt *ctxt; unsigned long pages; dma_addr_t addr; @@ -133,9 +134,9 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma) buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL, node); if (!buffer) goto fail1; - addr = ib_dma_map_single(rdma->sc_pd->device, buffer, - rdma->sc_max_req_size, DMA_FROM_DEVICE); - if (ib_dma_mapping_error(rdma->sc_pd->device, addr)) + addr = ib_dma_map_single(device, buffer, rdma->sc_max_req_size, + DMA_FROM_DEVICE); + if (ib_dma_mapping_error(device, addr)) goto fail2; svc_rdma_recv_cid_init(rdma, &ctxt->rc_cid); @@ -167,7 +168,7 @@ fail0: static void svc_rdma_recv_ctxt_destroy(struct svcxprt_rdma *rdma, struct svc_rdma_recv_ctxt *ctxt) { - ib_dma_unmap_single(rdma->sc_pd->device, ctxt->rc_recv_sge.addr, + ib_dma_unmap_single(rdma->sc_cm_id->device, ctxt->rc_recv_sge.addr, ctxt->rc_recv_sge.length, DMA_FROM_DEVICE); kfree(ctxt->rc_recv_buf); kfree(ctxt); @@ -861,18 +862,12 @@ static noinline void svc_rdma_read_complete(struct svc_rqst *rqstp, unsigned int i; /* Transfer the Read chunk pages into @rqstp.rq_pages, replacing - * the rq_pages that were already allocated for this rqstp. + * the receive buffer pages already allocated for this rqstp. */ - release_pages(rqstp->rq_respages, ctxt->rc_page_count); + release_pages(rqstp->rq_pages, ctxt->rc_page_count); for (i = 0; i < ctxt->rc_page_count; i++) rqstp->rq_pages[i] = ctxt->rc_pages[i]; - /* Update @rqstp's result send buffer to start after the - * last page in the RDMA Read payload. - */ - rqstp->rq_respages = &rqstp->rq_pages[ctxt->rc_page_count]; - rqstp->rq_next_page = rqstp->rq_respages + 1; - /* Prevent svc_rdma_recv_ctxt_put() from releasing the * pages in ctxt::rc_pages a second time. */ @@ -931,10 +926,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) struct svc_rdma_recv_ctxt *ctxt; int ret; - /* Prevent svc_xprt_release() from releasing pages in rq_pages - * when returning 0 or an error. + /* Precaution: a zero page count on error return causes + * svc_rqst_release_pages() to release nothing. */ - rqstp->rq_respages = rqstp->rq_pages; rqstp->rq_next_page = rqstp->rq_respages; rqstp->rq_xprt_ctxt = NULL; @@ -962,7 +956,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) return 0; percpu_counter_inc(&svcrdma_stat_recv); - ib_dma_sync_single_for_cpu(rdma_xprt->sc_pd->device, + ib_dma_sync_single_for_cpu(rdma_xprt->sc_cm_id->device, ctxt->rc_recv_sge.addr, ctxt->rc_byte_len, DMA_FROM_DEVICE); svc_rdma_build_arg_xdr(rqstp, ctxt); diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 4ec2f9ae06aa..402e2ceca4ff 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -252,6 +252,28 @@ static void svc_rdma_write_info_free(struct svc_rdma_write_info *info) } /** + * svc_rdma_write_chunk_release - Release Write chunk I/O resources + * @rdma: controlling transport + * @ctxt: Send context that is being released + * + * Write chunk resources remain live until Send completion because + * Write WRs are chained to the Send WR. This function releases all + * write_info structures accumulated on @ctxt->sc_write_info_list. + */ +void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *ctxt) +{ + struct svc_rdma_write_info *info; + + while (!list_empty(&ctxt->sc_write_info_list)) { + info = list_first_entry(&ctxt->sc_write_info_list, + struct svc_rdma_write_info, wi_list); + list_del(&info->wi_list); + svc_rdma_write_info_free(info); + } +} + +/** * svc_rdma_reply_chunk_release - Release Reply chunk I/O resources * @rdma: controlling transport * @ctxt: Send context that is being released @@ -307,13 +329,11 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) struct ib_cqe *cqe = wc->wr_cqe; struct svc_rdma_chunk_ctxt *cc = container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe); - struct svc_rdma_write_info *info = - container_of(cc, struct svc_rdma_write_info, wi_cc); switch (wc->status) { case IB_WC_SUCCESS: trace_svcrdma_wc_write(&cc->cc_cid); - break; + return; case IB_WC_WR_FLUSH_ERR: trace_svcrdma_wc_write_flush(wc, &cc->cc_cid); break; @@ -321,12 +341,11 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) trace_svcrdma_wc_write_err(wc, &cc->cc_cid); } - svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount); - - if (unlikely(wc->status != IB_WC_SUCCESS)) - svc_xprt_deferred_close(&rdma->sc_xprt); - - svc_rdma_write_info_free(info); + /* The RDMA Write has flushed, so the client won't get + * some of the outgoing RPC message. Signal the loss + * to the client by closing the connection. + */ + svc_xprt_deferred_close(&rdma->sc_xprt); } /** @@ -405,34 +424,17 @@ static int svc_rdma_post_chunk_ctxt(struct svcxprt_rdma *rdma, cqe = NULL; } - do { - if (atomic_sub_return(cc->cc_sqecount, - &rdma->sc_sq_avail) > 0) { - cc->cc_posttime = ktime_get(); - ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr); - if (ret) - break; - return 0; - } - - percpu_counter_inc(&svcrdma_stat_sq_starve); - trace_svcrdma_sq_full(rdma, &cc->cc_cid); - atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail); - wait_event(rdma->sc_send_wait, - atomic_read(&rdma->sc_sq_avail) > cc->cc_sqecount); - trace_svcrdma_sq_retry(rdma, &cc->cc_cid); - } while (1); - - trace_svcrdma_sq_post_err(rdma, &cc->cc_cid, ret); - svc_xprt_deferred_close(&rdma->sc_xprt); - - /* If even one was posted, there will be a completion. */ - if (bad_wr != first_wr) - return 0; + ret = svc_rdma_sq_wait(rdma, &cc->cc_cid, cc->cc_sqecount); + if (ret < 0) + return ret; - atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail); - wake_up(&rdma->sc_send_wait); - return -ENOTCONN; + cc->cc_posttime = ktime_get(); + ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr); + if (ret) + return svc_rdma_post_send_err(rdma, &cc->cc_cid, bad_wr, + first_wr, cc->cc_sqecount, + ret); + return 0; } /* Build a bvec that covers one kvec in an xdr_buf. @@ -617,9 +619,37 @@ static int svc_rdma_xb_write(const struct xdr_buf *xdr, void *data) return xdr->len; } -static int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, - const struct svc_rdma_chunk *chunk, - const struct xdr_buf *xdr) +/* Link chunk WRs onto @sctxt's WR chain. Completion is requested + * for the tail WR, which is posted first. + */ +static void svc_rdma_cc_link_wrs(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *sctxt, + struct svc_rdma_chunk_ctxt *cc) +{ + struct ib_send_wr *first_wr; + struct list_head *pos; + struct ib_cqe *cqe; + + first_wr = sctxt->sc_wr_chain; + cqe = &cc->cc_cqe; + list_for_each(pos, &cc->cc_rwctxts) { + struct svc_rdma_rw_ctxt *rwc; + + rwc = list_entry(pos, struct svc_rdma_rw_ctxt, rw_list); + first_wr = rdma_rw_ctx_wrs(&rwc->rw_ctx, rdma->sc_qp, + rdma->sc_port_num, cqe, first_wr); + cqe = NULL; + } + sctxt->sc_wr_chain = first_wr; + sctxt->sc_sqecount += cc->cc_sqecount; +} + +/* Link Write WRs for @chunk onto @sctxt's WR chain. + */ +static int svc_rdma_prepare_write_chunk(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *sctxt, + const struct svc_rdma_chunk *chunk, + const struct xdr_buf *xdr) { struct svc_rdma_write_info *info; struct svc_rdma_chunk_ctxt *cc; @@ -639,10 +669,14 @@ static int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, if (ret != payload.len) goto out_err; - trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount); - ret = svc_rdma_post_chunk_ctxt(rdma, cc); - if (ret < 0) + ret = -EINVAL; + if (unlikely(sctxt->sc_sqecount + cc->cc_sqecount > rdma->sc_sq_depth)) goto out_err; + + svc_rdma_cc_link_wrs(rdma, sctxt, cc); + list_add(&info->wi_list, &sctxt->sc_write_info_list); + + trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount); return 0; out_err: @@ -651,17 +685,19 @@ out_err: } /** - * svc_rdma_send_write_list - Send all chunks on the Write list + * svc_rdma_prepare_write_list - Construct WR chain for sending Write list * @rdma: controlling RDMA transport * @rctxt: Write list provisioned by the client + * @sctxt: Send WR resources * @xdr: xdr_buf containing an RPC Reply message * - * Returns zero on success, or a negative errno if one or more - * Write chunks could not be sent. + * Returns zero on success, or a negative errno if WR chain + * construction fails for one or more Write chunks. */ -int svc_rdma_send_write_list(struct svcxprt_rdma *rdma, - const struct svc_rdma_recv_ctxt *rctxt, - const struct xdr_buf *xdr) +int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma, + const struct svc_rdma_recv_ctxt *rctxt, + struct svc_rdma_send_ctxt *sctxt, + const struct xdr_buf *xdr) { struct svc_rdma_chunk *chunk; int ret; @@ -669,7 +705,7 @@ int svc_rdma_send_write_list(struct svcxprt_rdma *rdma, pcl_for_each_chunk(chunk, &rctxt->rc_write_pcl) { if (!chunk->ch_payload_length) break; - ret = svc_rdma_send_write_chunk(rdma, chunk, xdr); + ret = svc_rdma_prepare_write_chunk(rdma, sctxt, chunk, xdr); if (ret < 0) return ret; } @@ -699,9 +735,6 @@ int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma, { struct svc_rdma_write_info *info = &sctxt->sc_reply_info; struct svc_rdma_chunk_ctxt *cc = &info->wi_cc; - struct ib_send_wr *first_wr; - struct list_head *pos; - struct ib_cqe *cqe; int ret; info->wi_rdma = rdma; @@ -715,23 +748,222 @@ int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma, if (ret < 0) return ret; - first_wr = sctxt->sc_wr_chain; - cqe = &cc->cc_cqe; - list_for_each(pos, &cc->cc_rwctxts) { - struct svc_rdma_rw_ctxt *rwc; - - rwc = list_entry(pos, struct svc_rdma_rw_ctxt, rw_list); - first_wr = rdma_rw_ctx_wrs(&rwc->rw_ctx, rdma->sc_qp, - rdma->sc_port_num, cqe, first_wr); - cqe = NULL; - } - sctxt->sc_wr_chain = first_wr; - sctxt->sc_sqecount += cc->cc_sqecount; + svc_rdma_cc_link_wrs(rdma, sctxt, cc); trace_svcrdma_post_reply_chunk(&cc->cc_cid, cc->cc_sqecount); return xdr->len; } +/* + * Cap contiguous RDMA Read sink allocations at order-4. + * Higher orders risk allocation failure under + * __GFP_NORETRY, which would negate the benefit of the + * contiguous fast path. + */ +#define SVC_RDMA_CONTIG_MAX_ORDER 4 + +/** + * svc_rdma_alloc_read_pages - Allocate physically contiguous pages + * @nr_pages: number of pages needed + * @order: on success, set to the allocation order + * + * Attempts a higher-order allocation, falling back to smaller orders. + * The returned pages are split immediately so each sub-page has its + * own refcount and can be freed independently. + * + * Returns a pointer to the first page on success, or NULL if even + * order-1 allocation fails. + */ +static struct page * +svc_rdma_alloc_read_pages(unsigned int nr_pages, unsigned int *order) +{ + unsigned int o; + struct page *page; + + o = min(get_order(nr_pages << PAGE_SHIFT), + SVC_RDMA_CONTIG_MAX_ORDER); + + while (o >= 1) { + page = alloc_pages(GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN, + o); + if (page) { + split_page(page, o); + *order = o; + return page; + } + o--; + } + return NULL; +} + +/* + * svc_rdma_fill_contig_bvec - Replace rq_pages with a contiguous allocation + * @rqstp: RPC transaction context + * @head: context for ongoing I/O + * @bv: bvec entry to fill + * @pages_left: number of data pages remaining in the segment + * @len_left: bytes remaining in the segment + * + * On success, fills @bv with a bvec spanning the contiguous range and + * advances rc_curpage/rc_page_count. Returns the byte length covered, + * or zero if the allocation failed or would overrun rq_maxpages. + */ +static unsigned int +svc_rdma_fill_contig_bvec(struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *head, + struct bio_vec *bv, unsigned int pages_left, + unsigned int len_left) +{ + unsigned int order, npages, chunk_pages, chunk_len, i; + struct page *page; + + page = svc_rdma_alloc_read_pages(pages_left, &order); + if (!page) + return 0; + npages = 1 << order; + + if (head->rc_curpage + npages > rqstp->rq_maxpages) { + for (i = 0; i < npages; i++) + __free_page(page + i); + return 0; + } + + /* + * Replace rq_pages[] entries with pages from the contiguous + * allocation. If npages exceeds chunk_pages, the extra pages + * stay in rq_pages[] for later reuse or normal rqst teardown. + */ + for (i = 0; i < npages; i++) { + svc_rqst_page_release(rqstp, + rqstp->rq_pages[head->rc_curpage + i]); + rqstp->rq_pages[head->rc_curpage + i] = page + i; + } + + chunk_pages = min(npages, pages_left); + chunk_len = min_t(unsigned int, chunk_pages << PAGE_SHIFT, len_left); + bvec_set_page(bv, page, chunk_len, 0); + head->rc_page_count += chunk_pages; + head->rc_curpage += chunk_pages; + return chunk_len; +} + +/* + * svc_rdma_fill_page_bvec - Add a single rq_page to the bvec array + * @head: context for ongoing I/O + * @ctxt: R/W context whose bvec array is being filled + * @cur: page to add + * @bvec_idx: pointer to current bvec index, not advanced on merge + * @len_left: bytes remaining in the segment + * + * If @cur is physically contiguous with the preceding bvec, it is + * merged by extending that bvec's length. Otherwise a new bvec + * entry is created. Returns the byte length covered. + */ +static unsigned int +svc_rdma_fill_page_bvec(struct svc_rdma_recv_ctxt *head, + struct svc_rdma_rw_ctxt *ctxt, struct page *cur, + unsigned int *bvec_idx, unsigned int len_left) +{ + unsigned int chunk_len = min_t(unsigned int, PAGE_SIZE, len_left); + + head->rc_page_count++; + head->rc_curpage++; + + if (*bvec_idx > 0) { + struct bio_vec *prev = &ctxt->rw_bvec[*bvec_idx - 1]; + + if (page_to_phys(prev->bv_page) + prev->bv_offset + + prev->bv_len == page_to_phys(cur)) { + prev->bv_len += chunk_len; + return chunk_len; + } + } + + bvec_set_page(&ctxt->rw_bvec[*bvec_idx], cur, chunk_len, 0); + (*bvec_idx)++; + return chunk_len; +} + +/** + * svc_rdma_build_read_segment_contig - Build RDMA Read WR with contiguous pages + * @rqstp: RPC transaction context + * @head: context for ongoing I/O + * @segment: co-ordinates of remote memory to be read + * + * Greedily allocates higher-order pages to cover the segment, + * building one bvec per contiguous chunk. Each allocation is + * split so sub-pages have independent refcounts. When a + * higher-order allocation fails, remaining pages are covered + * individually, merging adjacent pages into the preceding bvec + * when they are physically contiguous. The split sub-pages + * replace entries in rq_pages[] so downstream cleanup is + * unchanged. + * + * Returns: + * %0: the Read WR was constructed successfully + * %-ENOMEM: allocation failed + * %-EIO: a DMA mapping error occurred + */ +static int svc_rdma_build_read_segment_contig(struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *head, + const struct svc_rdma_segment *segment) +{ + struct svcxprt_rdma *rdma = svc_rdma_rqst_rdma(rqstp); + struct svc_rdma_chunk_ctxt *cc = &head->rc_cc; + unsigned int nr_data_pages, bvec_idx; + struct svc_rdma_rw_ctxt *ctxt; + unsigned int len_left; + int ret; + + nr_data_pages = PAGE_ALIGN(segment->rs_length) >> PAGE_SHIFT; + if (head->rc_curpage + nr_data_pages > rqstp->rq_maxpages) + return -ENOMEM; + + ctxt = svc_rdma_get_rw_ctxt(rdma, nr_data_pages); + if (!ctxt) + return -ENOMEM; + + bvec_idx = 0; + len_left = segment->rs_length; + while (len_left) { + unsigned int pages_left = PAGE_ALIGN(len_left) >> PAGE_SHIFT; + unsigned int chunk_len = 0; + + if (pages_left >= 2) + chunk_len = svc_rdma_fill_contig_bvec(rqstp, head, + &ctxt->rw_bvec[bvec_idx], + pages_left, len_left); + if (chunk_len) { + bvec_idx++; + } else { + struct page *cur = + rqstp->rq_pages[head->rc_curpage]; + chunk_len = svc_rdma_fill_page_bvec(head, ctxt, cur, + &bvec_idx, + len_left); + } + + len_left -= chunk_len; + } + + ctxt->rw_nents = bvec_idx; + + head->rc_pageoff = offset_in_page(segment->rs_length); + if (head->rc_pageoff) + head->rc_curpage--; + + ret = svc_rdma_rw_ctx_init(rdma, ctxt, segment->rs_offset, + segment->rs_handle, segment->rs_length, + DMA_FROM_DEVICE); + if (ret < 0) + return -EIO; + percpu_counter_inc(&svcrdma_stat_read); + + list_add(&ctxt->rw_list, &cc->cc_rwctxts); + cc->cc_sqecount += ret; + return 0; +} + /** * svc_rdma_build_read_segment - Build RDMA Read WQEs to pull one RDMA segment * @rqstp: RPC transaction context @@ -758,6 +990,14 @@ static int svc_rdma_build_read_segment(struct svc_rqst *rqstp, if (check_add_overflow(head->rc_pageoff, len, &total)) return -EINVAL; nr_bvec = PAGE_ALIGN(total) >> PAGE_SHIFT; + + if (head->rc_pageoff == 0 && nr_bvec >= 2) { + ret = svc_rdma_build_read_segment_contig(rqstp, head, + segment); + if (ret != -ENOMEM) + return ret; + } + ctxt = svc_rdma_get_rw_ctxt(rdma, nr_bvec); if (!ctxt) return -ENOMEM; @@ -1103,10 +1343,16 @@ static void svc_rdma_clear_rqst_pages(struct svc_rqst *rqstp, { unsigned int i; + /* + * Move only pages containing RPC data into rc_pages[]. Pages + * from a contiguous allocation that were not used for the + * payload remain in rq_pages[] for subsequent reuse. + */ for (i = 0; i < head->rc_page_count; i++) { head->rc_pages[i] = rqstp->rq_pages[i]; rqstp->rq_pages[i] = NULL; } + rqstp->rq_pages_nfree = head->rc_page_count; } /** diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 914cd263c2f1..8b3f0c8c14b2 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -116,7 +116,8 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc); static struct svc_rdma_send_ctxt * svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma) { - int node = ibdev_to_node(rdma->sc_cm_id->device); + struct ib_device *device = rdma->sc_cm_id->device; + int node = ibdev_to_node(device); struct svc_rdma_send_ctxt *ctxt; unsigned long pages; dma_addr_t addr; @@ -136,9 +137,9 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma) buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL, node); if (!buffer) goto fail2; - addr = ib_dma_map_single(rdma->sc_pd->device, buffer, - rdma->sc_max_req_size, DMA_TO_DEVICE); - if (ib_dma_mapping_error(rdma->sc_pd->device, addr)) + addr = ib_dma_map_single(device, buffer, rdma->sc_max_req_size, + DMA_TO_DEVICE); + if (ib_dma_mapping_error(device, addr)) goto fail3; svc_rdma_send_cid_init(rdma, &ctxt->sc_cid); @@ -149,6 +150,7 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma) ctxt->sc_send_wr.sg_list = ctxt->sc_sges; ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED; ctxt->sc_cqe.done = svc_rdma_wc_send; + INIT_LIST_HEAD(&ctxt->sc_write_info_list); ctxt->sc_xprt_buf = buffer; xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf, rdma->sc_max_req_size); @@ -175,15 +177,14 @@ fail0: */ void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma) { + struct ib_device *device = rdma->sc_cm_id->device; struct svc_rdma_send_ctxt *ctxt; struct llist_node *node; while ((node = llist_del_first(&rdma->sc_send_ctxts)) != NULL) { ctxt = llist_entry(node, struct svc_rdma_send_ctxt, sc_node); - ib_dma_unmap_single(rdma->sc_pd->device, - ctxt->sc_sges[0].addr, - rdma->sc_max_req_size, - DMA_TO_DEVICE); + ib_dma_unmap_single(device, ctxt->sc_sges[0].addr, + rdma->sc_max_req_size, DMA_TO_DEVICE); kfree(ctxt->sc_xprt_buf); kfree(ctxt->sc_pages); kfree(ctxt); @@ -237,6 +238,7 @@ static void svc_rdma_send_ctxt_release(struct svcxprt_rdma *rdma, struct ib_device *device = rdma->sc_cm_id->device; unsigned int i; + svc_rdma_write_chunk_release(rdma, ctxt); svc_rdma_reply_chunk_release(rdma, ctxt); if (ctxt->sc_page_count) @@ -295,6 +297,117 @@ void svc_rdma_wake_send_waiters(struct svcxprt_rdma *rdma, int avail) } /** + * svc_rdma_sq_wait - Wait for SQ slots using fair queuing + * @rdma: controlling transport + * @cid: completion ID for tracing + * @sqecount: number of SQ entries needed + * + * A ticket-based system ensures fair ordering when multiple threads + * wait for Send Queue capacity. Each waiter takes a ticket and is + * served in order, preventing starvation. + * + * Protocol invariant: every ticket holder must increment + * sc_sq_ticket_tail exactly once, whether the reservation + * succeeds or the connection closes. Failing to advance the + * tail stalls all subsequent waiters. + * + * The ticket counters are signed 32-bit atomics. After + * wrapping through INT_MAX, the equality check + * (tail == ticket) remains correct because both counters + * advance monotonically and the comparison uses exact + * equality rather than relational operators. + * + * Return values: + * %0: SQ slots were reserved successfully + * %-ENOTCONN: The connection was lost + */ +int svc_rdma_sq_wait(struct svcxprt_rdma *rdma, + const struct rpc_rdma_cid *cid, int sqecount) +{ + int ticket; + + /* Fast path: try to reserve SQ slots without waiting. + * + * A failed reservation temporarily understates sc_sq_avail + * until the compensating atomic_add restores it. A Send + * completion arriving in that window sees a lower count + * than reality, but the value self-corrects once the add + * completes. No ordering guarantee is needed here because + * the slow path serializes all contended waiters. + */ + if (likely(atomic_sub_return(sqecount, &rdma->sc_sq_avail) >= 0)) + return 0; + atomic_add(sqecount, &rdma->sc_sq_avail); + + /* Slow path: take a ticket and wait in line */ + ticket = atomic_fetch_inc(&rdma->sc_sq_ticket_head); + + percpu_counter_inc(&svcrdma_stat_sq_starve); + trace_svcrdma_sq_full(rdma, cid); + + /* Wait until all earlier tickets have been served */ + wait_event(rdma->sc_sq_ticket_wait, + test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags) || + atomic_read(&rdma->sc_sq_ticket_tail) == ticket); + if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags)) + goto out_close; + + /* It's our turn. Wait for enough SQ slots to be available. */ + while (atomic_sub_return(sqecount, &rdma->sc_sq_avail) < 0) { + atomic_add(sqecount, &rdma->sc_sq_avail); + + wait_event(rdma->sc_send_wait, + test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags) || + atomic_read(&rdma->sc_sq_avail) >= sqecount); + if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags)) + goto out_close; + } + + /* Slots reserved successfully. Let the next waiter proceed. */ + atomic_inc(&rdma->sc_sq_ticket_tail); + wake_up(&rdma->sc_sq_ticket_wait); + trace_svcrdma_sq_retry(rdma, cid); + return 0; + +out_close: + atomic_inc(&rdma->sc_sq_ticket_tail); + wake_up(&rdma->sc_sq_ticket_wait); + return -ENOTCONN; +} + +/** + * svc_rdma_post_send_err - Handle ib_post_send failure + * @rdma: controlling transport + * @cid: completion ID for tracing + * @bad_wr: first WR that was not posted + * @first_wr: first WR in the chain + * @sqecount: number of SQ entries that were reserved + * @ret: error code from ib_post_send + * + * Return values: + * %0: At least one WR was posted; a completion handles cleanup + * %-ENOTCONN: No WRs were posted; SQ slots are released + */ +int svc_rdma_post_send_err(struct svcxprt_rdma *rdma, + const struct rpc_rdma_cid *cid, + const struct ib_send_wr *bad_wr, + const struct ib_send_wr *first_wr, + int sqecount, int ret) +{ + trace_svcrdma_sq_post_err(rdma, cid, ret); + svc_xprt_deferred_close(&rdma->sc_xprt); + + /* If even one WR was posted, a Send completion will + * return the reserved SQ slots. + */ + if (bad_wr != first_wr) + return 0; + + svc_rdma_wake_send_waiters(rdma, sqecount); + return -ENOTCONN; +} + +/** * svc_rdma_wc_send - Invoked by RDMA provider for each polled Send WC * @cq: Completion Queue context * @wc: Work Completion object @@ -336,11 +449,6 @@ flushed: * that these values remain available after the ib_post_send() call. * In some error flow cases, svc_rdma_wc_send() releases @ctxt. * - * Note there is potential for starvation when the Send Queue is - * full because there is no order to when waiting threads are - * awoken. The transport is typically provisioned with a deep - * enough Send Queue that SQ exhaustion should be a rare event. - * * Return values: * %0: @ctxt's WR chain was posted successfully * %-ENOTCONN: The connection was lost @@ -357,47 +465,21 @@ int svc_rdma_post_send(struct svcxprt_rdma *rdma, might_sleep(); /* Sync the transport header buffer */ - ib_dma_sync_single_for_device(rdma->sc_pd->device, + ib_dma_sync_single_for_device(rdma->sc_cm_id->device, send_wr->sg_list[0].addr, send_wr->sg_list[0].length, DMA_TO_DEVICE); - /* If the SQ is full, wait until an SQ entry is available */ - while (!test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags)) { - if (atomic_sub_return(sqecount, &rdma->sc_sq_avail) < 0) { - svc_rdma_wake_send_waiters(rdma, sqecount); - - /* When the transport is torn down, assume - * ib_drain_sq() will trigger enough Send - * completions to wake us. The XPT_CLOSE test - * above should then cause the while loop to - * exit. - */ - percpu_counter_inc(&svcrdma_stat_sq_starve); - trace_svcrdma_sq_full(rdma, &cid); - wait_event(rdma->sc_send_wait, - atomic_read(&rdma->sc_sq_avail) > 0); - trace_svcrdma_sq_retry(rdma, &cid); - continue; - } - - trace_svcrdma_post_send(ctxt); - ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr); - if (ret) { - trace_svcrdma_sq_post_err(rdma, &cid, ret); - svc_xprt_deferred_close(&rdma->sc_xprt); - - /* If even one WR was posted, there will be a - * Send completion that bumps sc_sq_avail. - */ - if (bad_wr == first_wr) { - svc_rdma_wake_send_waiters(rdma, sqecount); - break; - } - } - return 0; - } - return -ENOTCONN; + ret = svc_rdma_sq_wait(rdma, &cid, sqecount); + if (ret < 0) + return ret; + + trace_svcrdma_post_send(ctxt); + ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr); + if (ret) + return svc_rdma_post_send_err(rdma, &cid, bad_wr, + first_wr, sqecount, ret); + return 0; } /** @@ -858,7 +940,8 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, /* The svc_rqst and all resources it owns are released as soon as * svc_rdma_sendto returns. Transfer pages under I/O to the ctxt - * so they are released by the Send completion handler. + * so they are released only after Send completion, and not by + * svc_rqst_release_pages(). */ static void svc_rdma_save_io_pages(struct svc_rqst *rqstp, struct svc_rdma_send_ctxt *ctxt) @@ -870,9 +953,6 @@ static void svc_rdma_save_io_pages(struct svc_rqst *rqstp, ctxt->sc_pages[i] = rqstp->rq_respages[i]; rqstp->rq_respages[i] = NULL; } - - /* Prevent svc_xprt_release from releasing pages in rq_pages */ - rqstp->rq_next_page = rqstp->rq_respages; } /* Prepare the portion of the RPC Reply that will be transmitted @@ -976,6 +1056,12 @@ void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, sctxt->sc_send_wr.num_sge = 1; sctxt->sc_send_wr.opcode = IB_WR_SEND; sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len; + + /* Ensure only the error message is posted, not any previously + * prepared Write chunk WRs. + */ + sctxt->sc_wr_chain = &sctxt->sc_send_wr; + sctxt->sc_sqecount = 1; if (svc_rdma_post_send(rdma, sctxt)) goto put_ctxt; return; @@ -1023,7 +1109,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) if (!p) goto put_ctxt; - ret = svc_rdma_send_write_list(rdma, rctxt, &rqstp->rq_res); + ret = svc_rdma_prepare_write_list(rdma, rctxt, sctxt, &rqstp->rq_res); if (ret < 0) goto put_ctxt; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 9b623849723e..f18bc60d9f4f 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -179,6 +179,7 @@ static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv, init_llist_head(&cma_xprt->sc_recv_ctxts); init_llist_head(&cma_xprt->sc_rw_ctxts); init_waitqueue_head(&cma_xprt->sc_send_wait); + init_waitqueue_head(&cma_xprt->sc_sq_ticket_wait); spin_lock_init(&cma_xprt->sc_lock); spin_lock_init(&cma_xprt->sc_rq_dto_lock); @@ -414,7 +415,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) struct ib_qp_init_attr qp_attr; struct ib_device *dev; int ret = 0; - RPC_IFDEBUG(struct sockaddr *sap); listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt); clear_bit(XPT_CONN, &xprt->xpt_flags); @@ -478,6 +478,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr) newxprt->sc_sq_depth = dev->attrs.max_qp_wr; atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth); + atomic_set(&newxprt->sc_sq_ticket_head, 0); + atomic_set(&newxprt->sc_sq_ticket_tail, 0); newxprt->sc_pd = ib_alloc_pd(dev, 0); if (IS_ERR(newxprt->sc_pd)) { @@ -560,18 +562,20 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) goto errout; } -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) - dprintk("svcrdma: new connection accepted on device %s:\n", dev->name); - sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr; - dprintk(" local address : %pIS:%u\n", sap, rpc_get_port(sap)); - sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; - dprintk(" remote address : %pIS:%u\n", sap, rpc_get_port(sap)); - dprintk(" max_sge : %d\n", newxprt->sc_max_send_sges); - dprintk(" sq_depth : %d\n", newxprt->sc_sq_depth); - dprintk(" rdma_rw_ctxs : %d\n", ctxts); - dprintk(" max_requests : %d\n", newxprt->sc_max_requests); - dprintk(" ord : %d\n", conn_param.initiator_depth); -#endif + if (IS_ENABLED(CONFIG_SUNRPC_DEBUG)) { + struct sockaddr *sap; + + dprintk("svcrdma: new connection accepted on device %s:\n", dev->name); + sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr; + dprintk(" local address : %pIS:%u\n", sap, rpc_get_port(sap)); + sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; + dprintk(" remote address : %pIS:%u\n", sap, rpc_get_port(sap)); + dprintk(" max_sge : %d\n", newxprt->sc_max_send_sges); + dprintk(" sq_depth : %d\n", newxprt->sc_sq_depth); + dprintk(" rdma_rw_ctxs : %d\n", ctxts); + dprintk(" max_requests : %d\n", newxprt->sc_max_requests); + dprintk(" ord : %d\n", conn_param.initiator_depth); + } return &newxprt->sc_xprt; @@ -648,7 +652,8 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt) * If there are already waiters on the SQ, * return false. */ - if (waitqueue_active(&rdma->sc_send_wait)) + if (waitqueue_active(&rdma->sc_send_wait) || + waitqueue_active(&rdma->sc_sq_ticket_wait)) return 0; /* Otherwise return true. */ diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 9a8ce5df83ca..61706df5e485 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -510,8 +510,21 @@ xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) return; out_sleep: - task->tk_status = -ENOMEM; - xprt_add_backlog(xprt, task); + task->tk_status = -EAGAIN; + xprt_add_backlog_noncongested(xprt, task); + /* A buffer freed between buffer_get and rpc_sleep_on + * goes back to the pool with no waiter to wake. + * Re-check after joining the backlog to close that gap. + */ + req = rpcrdma_buffer_get(&r_xprt->rx_buf); + if (req) { + struct rpc_rqst *rqst = &req->rl_slot; + + if (!xprt_wake_up_backlog(xprt, rqst)) { + memset(rqst, 0, sizeof(*rqst)); + rpcrdma_buffer_put(&r_xprt->rx_buf, req); + } + } } /** diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index b51a162885bb..aecf9c0a153f 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -708,6 +708,18 @@ out_emptyq: */ xprt_wait_for_buffer_space(&r_xprt->rx_xprt); r_xprt->rx_stats.empty_sendctx_q++; + + /* Recheck: a Send completion between the ring-empty test + * and the set_bit could cause its xprt_write_space() to + * miss, leaving XPRT_WRITE_SPACE set with a non-full ring. + * The smp_mb__after_atomic() pairs with smp_store_release() + * in rpcrdma_sendctx_put_locked(). + */ + smp_mb__after_atomic(); + next_head = rpcrdma_sendctx_next(buf, buf->rb_sc_head); + if (next_head != READ_ONCE(buf->rb_sc_tail)) + xprt_write_space(&r_xprt->rx_xprt); + return NULL; } @@ -739,7 +751,10 @@ static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, } while (buf->rb_sc_ctxs[next_tail] != sc); - /* Paired with READ_ONCE */ + /* Paired with READ_ONCE in rpcrdma_sendctx_get_locked(): + * both the fast-path ring-full test and the post-set_bit + * recheck in the slow path depend on this store-release. + */ smp_store_release(&buf->rb_sc_tail, next_tail); xprt_write_space(&r_xprt->rx_xprt); @@ -1359,7 +1374,7 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed) if (likely(ep->re_receive_count > needed)) goto out; needed -= ep->re_receive_count; - needed += RPCRDMA_MAX_RECV_BATCH; + needed += ep->re_recv_batch; if (atomic_inc_return(&ep->re_receiving) > 1) goto out_dec; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 8147d2b41494..f53a77472724 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -96,6 +96,7 @@ struct rpcrdma_ep { struct rpcrdma_notification re_rn; int re_receive_count; unsigned int re_max_requests; /* depends on device */ + unsigned int re_recv_batch; unsigned int re_inline_send; /* negotiated */ unsigned int re_inline_recv; /* negotiated */ @@ -283,19 +284,36 @@ struct rpcrdma_mr { * registered or invalidated. Must handle a Reply chunk: */ enum { - RPCRDMA_MAX_IOV_SEGS = 3, + RPCRDMA_MAX_IOV_SEGS = 3, /* head, page-boundary, tail */ RPCRDMA_MAX_DATA_SEGS = ((1 * 1024 * 1024) / PAGE_SIZE) + 1, RPCRDMA_MAX_SEGS = RPCRDMA_MAX_DATA_SEGS + RPCRDMA_MAX_IOV_SEGS, }; -/* Arguments for DMA mapping and registration */ -struct rpcrdma_mr_seg { - u32 mr_len; /* length of segment */ - struct page *mr_page; /* underlying struct page */ - u64 mr_offset; /* IN: page offset, OUT: iova */ +/** + * struct rpcrdma_xdr_cursor - tracks position within an xdr_buf + * for iterative MR registration + * @xc_buf: the xdr_buf being iterated + * @xc_page_offset: byte offset into the page region consumed so far + * @xc_flags: combination of XC_* bits + * + * Each XC_*_DONE flag indicates that this region has no + * remaining MR registration work. That condition holds both when the region + * has already been registered by a prior frwr_map() call and + * when the region is excluded from this chunk type (pre-set + * at init time by rpcrdma_xdr_cursor_init()). frwr_map() + * treats the two cases identically: skip the region. + */ +struct rpcrdma_xdr_cursor { + const struct xdr_buf *xc_buf; + unsigned int xc_page_offset; + unsigned int xc_flags; }; +#define XC_HEAD_DONE BIT(0) +#define XC_PAGES_DONE BIT(1) +#define XC_TAIL_DONE BIT(2) + /* The Send SGE array is provisioned to send a maximum size * inline request: * - RPC-over-RDMA header @@ -330,7 +348,6 @@ struct rpcrdma_req { struct list_head rl_free_mrs; struct list_head rl_registered; - struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS]; }; static inline struct rpcrdma_req * @@ -450,8 +467,8 @@ rpcrdma_portstr(const struct rpcrdma_xprt *r_xprt) } /* Setting this to 0 ensures interoperability with early servers. - * Setting this to 1 enhances certain unaligned read/write performance. - * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */ + * Setting this to 1 enhances unaligned read/write performance. + * Default is 0, see sysctl entry and rpc_rdma.c */ extern int xprt_rdma_pad_optimize; /* This setting controls the hunt for a supported memory @@ -535,10 +552,10 @@ void frwr_reset(struct rpcrdma_req *req); int frwr_query_device(struct rpcrdma_ep *ep, const struct ib_device *device); int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr); void frwr_mr_release(struct rpcrdma_mr *mr); -struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, - struct rpcrdma_mr_seg *seg, - int nsegs, bool writing, __be32 xid, - struct rpcrdma_mr *mr); +int frwr_map(struct rpcrdma_xprt *r_xprt, + struct rpcrdma_xdr_cursor *cur, + bool writing, __be32 xid, + struct rpcrdma_mr *mr); int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs); void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 76284fc538eb..b0bba0feef56 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -177,8 +177,20 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) if (fragid == LAST_FRAGMENT) { TIPC_SKB_CB(head)->validated = 0; - if (unlikely(!tipc_msg_validate(&head))) + + /* If the reassembled skb has been freed in + * tipc_msg_validate() because of an invalid truesize, + * then head will point to a newly allocated reassembled + * skb, while *headbuf points to freed reassembled skb. + * In such cases, correct *headbuf for freeing the newly + * allocated reassembled skb later. + */ + if (unlikely(!tipc_msg_validate(&head))) { + if (head != *headbuf) + *headbuf = head; goto err; + } + *buf = head; TIPC_SKB_CB(head)->tail = NULL; *headbuf = NULL; diff --git a/net/tls/tls.h b/net/tls/tls.h index e8f81a006520..12f44cb649c9 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -188,6 +188,7 @@ int tls_strp_dev_init(void); void tls_strp_dev_exit(void); void tls_strp_done(struct tls_strparser *strp); +void __tls_strp_done(struct tls_strparser *strp); void tls_strp_stop(struct tls_strparser *strp); int tls_strp_init(struct tls_strparser *strp, struct sock *sk); void tls_strp_data_ready(struct tls_strparser *strp); diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c index 98e12f0ff57e..c72e88317627 100644 --- a/net/tls/tls_strp.c +++ b/net/tls/tls_strp.c @@ -624,6 +624,12 @@ void tls_strp_done(struct tls_strparser *strp) WARN_ON(!strp->stopped); cancel_work_sync(&strp->work); + __tls_strp_done(strp); +} + +/* For setup error paths where the strparser was initialized but never armed. */ +void __tls_strp_done(struct tls_strparser *strp) +{ tls_strp_anchor_free(strp); } diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 94d2ae0daa8c..3bfdaf5e64f5 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -789,23 +789,33 @@ static int tls_push_record(struct sock *sk, int flags, i = msg_pl->sg.end; sk_msg_iter_var_prev(i); + /* msg_pl->sg.data is a ring; data[MAX+1] is reserved for the wrap + * link (frags won't use it). 'i' is now the last filled entry: + * + * i end start + * v v v [ rsv ] + * [ d ][ d ][ ][ ]...[ ][ d ][ d ][ d ][chain] + * ^ END v + * `-----------------------------------------' + * + * Note that SGL does not allow chain-after-chain, so for TLS 1.3, + * we must make sure we don't create the wrap entry and then chain + * link to content_type immediately at index 0. + */ + if (i < msg_pl->sg.start) + sg_chain(msg_pl->sg.data, ARRAY_SIZE(msg_pl->sg.data), + msg_pl->sg.data); + rec->content_type = record_type; if (prot->version == TLS_1_3_VERSION) { /* Add content type to end of message. No padding added */ sg_set_buf(&rec->sg_content_type, &rec->content_type, 1); sg_mark_end(&rec->sg_content_type); - sg_chain(msg_pl->sg.data, msg_pl->sg.end + 1, - &rec->sg_content_type); + sg_chain(msg_pl->sg.data, i + 2, &rec->sg_content_type); } else { sg_mark_end(sk_msg_elem(msg_pl, i)); } - if (msg_pl->sg.end < msg_pl->sg.start) { - sg_chain(&msg_pl->sg.data[msg_pl->sg.start], - MAX_SKB_FRAGS - msg_pl->sg.start + 1, - msg_pl->sg.data); - } - i = msg_pl->sg.start; sg_chain(rec->sg_aead_in, 2, &msg_pl->sg.data[i]); @@ -2317,9 +2327,9 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, if (copied < 0) goto splice_requeue; - if (chunk < rxm->full_len) { - rxm->offset += len; - rxm->full_len -= len; + if (copied < rxm->full_len) { + rxm->offset += copied; + rxm->full_len -= copied; goto splice_requeue; } @@ -2624,8 +2634,12 @@ void tls_sw_free_ctx_rx(struct tls_context *tls_ctx) void tls_sw_free_resources_rx(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_rx *ctx; + + ctx = tls_sw_ctx_rx(tls_ctx); tls_sw_release_resources_rx(sk); + __tls_strp_done(&ctx->strp); tls_sw_free_ctx_rx(tls_ctx); } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 4c4a8d23ddd2..1cbf36ea043b 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1968,16 +1968,19 @@ static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) static void unix_destruct_scm(struct sk_buff *skb) { - struct scm_cookie scm; + struct scm_cookie scm = {}; + + swap(scm.pid, UNIXCB(skb).pid); - memset(&scm, 0, sizeof(scm)); - scm.pid = UNIXCB(skb).pid; if (UNIXCB(skb).fp) unix_detach_fds(&scm, skb); - /* Alas, it calls VFS */ - /* So fscking what? fput() had been SMP-safe since the last Summer */ scm_destroy(&scm); +} + +static void unix_wfree(struct sk_buff *skb) +{ + unix_destruct_scm(skb); sock_wfree(skb); } @@ -1993,7 +1996,7 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen if (scm->fp && send_fds) err = unix_attach_fds(scm, skb); - skb->destructor = unix_destruct_scm; + skb->destructor = unix_wfree; return err; } @@ -2070,6 +2073,13 @@ static void scm_stat_del(struct sock *sk, struct sk_buff *skb) } } +static void unix_orphan_scm(struct sock *sk, struct sk_buff *skb) +{ + scm_stat_del(sk, skb); + unix_destruct_scm(skb); + skb->destructor = sock_wfree; +} + /* * Send AF_UNIX data. */ @@ -2683,10 +2693,16 @@ static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor) int err; mutex_lock(&u->iolock); + skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err); - mutex_unlock(&u->iolock); - if (!skb) + if (!skb) { + mutex_unlock(&u->iolock); return err; + } + + unix_orphan_scm(sk, skb); + + mutex_unlock(&u->iolock); return recv_actor(sk, skb); } @@ -2886,6 +2902,9 @@ static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) #endif spin_unlock(&queue->lock); + + unix_orphan_scm(sk, skb); + mutex_unlock(&u->iolock); return recv_actor(sk, skb); @@ -3304,6 +3323,9 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct sk_buff *skb; int answ = 0; + if (sk->sk_type != SOCK_STREAM) + return -EOPNOTSUPP; + mutex_lock(&u->iolock); skb = skb_peek(&sk->sk_receive_queue); @@ -3735,15 +3757,15 @@ static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v) struct bpf_prog *prog; struct sock *sk = v; uid_t uid; - bool slow; int ret; if (v == SEQ_START_TOKEN) return 0; - slow = lock_sock_fast(sk); + lock_sock(sk); + unix_state_lock(sk); - if (unlikely(sk_unhashed(sk))) { + if (unlikely(sock_flag(sk, SOCK_DEAD))) { ret = SEQ_SKIP; goto unlock; } @@ -3753,7 +3775,8 @@ static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v) prog = bpf_iter_get_info(&meta, false); ret = unix_prog_seq_show(prog, &meta, v, uid); unlock: - unlock_sock_fast(sk, slow); + unix_state_unlock(sk); + release_sock(sk); return ret; } diff --git a/net/unix/garbage.c b/net/unix/garbage.c index a7967a345827..0783555e2526 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -607,6 +607,8 @@ static void unix_gc(struct work_struct *work) struct sk_buff_head hitlist; struct sk_buff *skb; + WRITE_ONCE(gc_in_progress, true); + spin_lock(&unix_gc_lock); if (unix_graph_state == UNIX_GRAPH_NOT_CYCLIC) { @@ -649,10 +651,8 @@ void unix_schedule_gc(struct user_struct *user) READ_ONCE(user->unix_inflight) < UNIX_INFLIGHT_SANE_USER) return; - if (!READ_ONCE(gc_in_progress)) { - WRITE_ONCE(gc_in_progress, true); + if (!READ_ONCE(gc_in_progress)) queue_work(system_dfl_wq, &unix_gc_work); - } if (user && READ_ONCE(unix_graph_cyclic_sccs)) flush_work(&unix_gc_work); diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c index d14cd5454a8d..f86ff19e9764 100644 --- a/net/unix/unix_bpf.c +++ b/net/unix/unix_bpf.c @@ -185,6 +185,9 @@ int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool r */ if (!psock->sk_pair) { sk_pair = unix_peer(sk); + if (unlikely(!sk_pair)) + return -EINVAL; + sock_hold(sk_pair); psock->sk_pair = sk_pair; } diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 2b7c0b5896ed..7a8963595bf9 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -375,10 +375,10 @@ static void hvs_open_connection(struct vmbus_channel *chan) } else { sndbuf = max_t(int, sk->sk_sndbuf, RINGBUFFER_HVS_SND_SIZE); sndbuf = min_t(int, sndbuf, RINGBUFFER_HVS_MAX_SIZE); - sndbuf = ALIGN(sndbuf, HV_HYP_PAGE_SIZE); + sndbuf = VMBUS_RING_SIZE(sndbuf); rcvbuf = max_t(int, sk->sk_rcvbuf, RINGBUFFER_HVS_RCV_SIZE); rcvbuf = min_t(int, rcvbuf, RINGBUFFER_HVS_MAX_SIZE); - rcvbuf = ALIGN(rcvbuf, HV_HYP_PAGE_SIZE); + rcvbuf = VMBUS_RING_SIZE(rcvbuf); } chan->max_pkt_size = HVS_MAX_PKT_SIZE; @@ -694,7 +694,6 @@ out: static s64 hvs_stream_has_data(struct vsock_sock *vsk) { struct hvsock *hvs = vsk->trans; - bool need_refill; s64 ret; if (hvs->recv_data_len > 0) @@ -702,9 +701,31 @@ static s64 hvs_stream_has_data(struct vsock_sock *vsk) switch (hvs_channel_readable_payload(hvs->chan)) { case 1: - need_refill = !hvs->recv_desc; - if (!need_refill) - return -EIO; + if (hvs->recv_desc) { + /* Here hvs->recv_data_len is 0, so hvs->recv_desc must + * be NULL unless it points to the 0-byte-payload FIN + * packet or a malformed/short packet: see + * hvs_update_recv_data(). + * + * If hvs->recv_desc points to the FIN packet, here all + * the payload has been dequeued and the peer_shutdown + * flag is set, but hvs_channel_readable_payload() still + * returns 1, because the VMBus ringbuffer's read_index + * is not updated for the FIN packet: + * hvs_stream_dequeue() -> hv_pkt_iter_next() updates + * the cached priv_read_index but has no opportunity to + * update the read_index in hv_pkt_iter_close() as + * hvs_stream_has_data() returns 0 for the FIN packet, + * so it won't get dequeued. + * + * In case hvs->recv_desc points to a malformed/short + * packet, return -EIO. + */ + if (!(vsk->peer_shutdown & SEND_SHUTDOWN)) + return -EIO; + + return 0; + } hvs->recv_desc = hv_pkt_iter_first(hvs->chan); if (!hvs->recv_desc) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index a152a9e208d0..989cc252d3d3 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -73,6 +73,7 @@ static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops, static int virtio_transport_init_zcopy_skb(struct vsock_sock *vsk, struct sk_buff *skb, struct msghdr *msg, + size_t pkt_len, bool zerocopy) { struct ubuf_info *uarg; @@ -81,12 +82,10 @@ static int virtio_transport_init_zcopy_skb(struct vsock_sock *vsk, uarg = msg->msg_ubuf; net_zcopy_get(uarg); } else { - struct iov_iter *iter = &msg->msg_iter; struct ubuf_info_msgzc *uarg_zc; uarg = msg_zerocopy_realloc(sk_vsock(vsk), - iter->count, - NULL, false); + pkt_len, NULL, false); if (!uarg) return -1; @@ -137,27 +136,6 @@ static void virtio_transport_init_hdr(struct sk_buff *skb, hdr->fwd_cnt = cpu_to_le32(0); } -static void virtio_transport_copy_nonlinear_skb(const struct sk_buff *skb, - void *dst, - size_t len) -{ - struct iov_iter iov_iter = { 0 }; - struct kvec kvec; - size_t to_copy; - - kvec.iov_base = dst; - kvec.iov_len = len; - - iov_iter.iter_type = ITER_KVEC; - iov_iter.kvec = &kvec; - iov_iter.nr_segs = 1; - - to_copy = min_t(size_t, len, skb->len); - - skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset, - &iov_iter, to_copy); -} - /* Packet capture */ static struct sk_buff *virtio_transport_build_skb(void *opaque) { @@ -167,12 +145,12 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) struct sk_buff *skb; size_t payload_len; - /* A packet could be split to fit the RX buffer, so we can retrieve - * the payload length from the header and the buffer pointer taking - * care of the offset in the original packet. + /* A packet could be split to fit the RX buffer, so we use + * the payload length from the header, which has been updated + * by the sender to reflect the fragment size. */ pkt_hdr = virtio_vsock_hdr(pkt); - payload_len = pkt->len; + payload_len = le32_to_cpu(pkt_hdr->len); skb = alloc_skb(sizeof(*hdr) + sizeof(*pkt_hdr) + payload_len, GFP_ATOMIC); @@ -215,12 +193,18 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) skb_put_data(skb, pkt_hdr, sizeof(*pkt_hdr)); if (payload_len) { - if (skb_is_nonlinear(pkt)) { - void *data = skb_put(skb, payload_len); - - virtio_transport_copy_nonlinear_skb(pkt, data, payload_len); - } else { - skb_put_data(skb, pkt->data, payload_len); + struct iov_iter iov_iter; + struct kvec kvec; + void *data = skb_put(skb, payload_len); + + kvec.iov_base = data; + kvec.iov_len = payload_len; + iov_iter_kvec(&iov_iter, ITER_DEST, &kvec, 1, payload_len); + + if (skb_copy_datagram_iter(pkt, VIRTIO_VSOCK_SKB_CB(pkt)->offset, + &iov_iter, payload_len)) { + kfree_skb(skb); + return NULL; } } @@ -398,11 +382,17 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, * each iteration. If this is last skb for this buffer * and MSG_ZEROCOPY mode is in use - we must allocate * completion for the current syscall. + * + * Pass pkt_len because msg iter is already consumed + * by virtio_transport_fill_skb(), so iter->count + * can not be used for RLIMIT_MEMLOCK pinned-pages + * accounting done by msg_zerocopy_realloc(). */ if (info->msg && info->msg->msg_flags & MSG_ZEROCOPY && skb_len == rest_len && info->op == VIRTIO_VSOCK_OP_RW) { if (virtio_transport_init_zcopy_skb(vsk, skb, info->msg, + pkt_len, can_zcopy)) { kfree_skb(skb); ret = -ENOMEM; @@ -442,7 +432,9 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, u32 len) { - if (vvs->buf_used + len > vvs->buf_alloc) + u64 skb_overhead = (skb_queue_len(&vvs->rx_queue) + 1) * SKB_TRUESIZE(0); + + if (skb_overhead + vvs->buf_used + len > vvs->buf_alloc) return false; vvs->rx_bytes += len; @@ -545,9 +537,8 @@ virtio_transport_stream_do_peek(struct vsock_sock *vsk, skb_queue_walk(&vvs->rx_queue, skb) { size_t bytes; - bytes = len - total; - if (bytes > skb->len) - bytes = skb->len; + bytes = min_t(size_t, len - total, + skb->len - VIRTIO_VSOCK_SKB_CB(skb)->offset); spin_unlock_bh(&vvs->rx_lock); @@ -1558,8 +1549,6 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb, return -ENOMEM; } - sk_acceptq_added(sk); - lock_sock_nested(child, SINGLE_DEPTH_NESTING); child->sk_state = TCP_ESTABLISHED; @@ -1581,6 +1570,7 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb, return ret; } + sk_acceptq_added(sk); if (virtio_transport_space_update(child, skb)) child->sk_write_space(child); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f334cdef8958..7db9cd433801 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1276,6 +1276,18 @@ static int nl80211_prepare_wdev_dump(struct netlink_callback *cb, rtnl_unlock(); return -ENODEV; } + + /* + * The first invocation validated the wdev's netns against + * the caller via __cfg80211_wdev_from_attrs(). The wiphy + * may have moved netns between dumpit invocations (via + * NL80211_CMD_SET_WIPHY_NETNS), so re-check here. + */ + if (!net_eq(wiphy_net(wiphy), sock_net(cb->skb->sk))) { + rtnl_unlock(); + return -ENODEV; + } + *rdev = wiphy_to_rdev(wiphy); *wdev = NULL; @@ -13867,6 +13879,19 @@ static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(net)) return PTR_ERR(net); + /* + * The caller already has CAP_NET_ADMIN over the source netns + * (enforced by GENL_UNS_ADMIN_PERM on the genl op). Mirror the + * convention used by net/core/rtnetlink.c::rtnl_get_net_ns_capable() + * and require CAP_NET_ADMIN over the target netns as well, so that + * a caller that is privileged in their own user namespace cannot + * push a wiphy into a netns where they have no privilege. + */ + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) { + put_net(net); + return -EPERM; + } + err = 0; /* check if anything to do */ @@ -19828,6 +19853,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .cmd = NL80211_CMD_SET_PMK, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_pmk, + .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_CLEAR_SKB), }, @@ -19835,6 +19861,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .cmd = NL80211_CMD_DEL_PMK, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_pmk, + .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c index 4c8ea0583f94..d6cd0de64d1f 100644 --- a/net/wireless/pmsr.c +++ b/net/wireless/pmsr.c @@ -88,7 +88,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev, out->ftm.ftms_per_burst = 0; if (tb[NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST]) out->ftm.ftms_per_burst = - nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST]); + nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST]); if (capa->ftm.max_ftms_per_burst && (out->ftm.ftms_per_burst > capa->ftm.max_ftms_per_burst || diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 887abed25466..5e5786cd9af5 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -646,9 +646,42 @@ static u64 xsk_skb_destructor_get_addr(struct sk_buff *skb) return (u64)((uintptr_t)skb_shinfo(skb)->destructor_arg & ~0x1UL); } -static void xsk_skb_destructor_set_addr(struct sk_buff *skb, u64 addr) +static struct xsk_addrs *__xsk_addrs_alloc(struct sk_buff *skb, u64 addr) { - skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t)addr | 0x1UL); + struct xsk_addrs *xsk_addr; + + xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, GFP_KERNEL); + if (unlikely(!xsk_addr)) + return NULL; + + xsk_addr->addrs[0] = addr; + skb_shinfo(skb)->destructor_arg = (void *)xsk_addr; + return xsk_addr; +} + +static struct xsk_addrs *xsk_addrs_alloc(struct sk_buff *skb) +{ + struct xsk_addrs *xsk_addr; + + if (!xsk_skb_destructor_is_addr(skb)) + return (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; + + xsk_addr = __xsk_addrs_alloc(skb, xsk_skb_destructor_get_addr(skb)); + if (likely(xsk_addr)) + xsk_addr->num_descs = 1; + return xsk_addr; +} + +static int xsk_skb_destructor_set_addr(struct sk_buff *skb, u64 addr) +{ + if (IS_ENABLED(CONFIG_64BIT)) { + skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t)addr | 0x1UL); + return 0; + } + + if (unlikely(!__xsk_addrs_alloc(skb, addr))) + return -ENOMEM; + return 0; } static void xsk_inc_num_desc(struct sk_buff *skb) @@ -685,7 +718,7 @@ static void xsk_cq_submit_addr_locked(struct xsk_buff_pool *pool, spin_lock_irqsave(&pool->cq_prod_lock, flags); idx = xskq_get_prod(pool->cq); - if (unlikely(num_descs > 1)) { + if (unlikely(!xsk_skb_destructor_is_addr(skb))) { xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; for (i = 0; i < num_descs; i++) { @@ -724,14 +757,20 @@ void xsk_destruct_skb(struct sk_buff *skb) sock_wfree(skb); } -static void xsk_skb_init_misc(struct sk_buff *skb, struct xdp_sock *xs, - u64 addr) +static int xsk_skb_init_misc(struct sk_buff *skb, struct xdp_sock *xs, + u64 addr) { + int err; + + err = xsk_skb_destructor_set_addr(skb, addr); + if (unlikely(err)) + return err; + skb->dev = xs->dev; skb->priority = READ_ONCE(xs->sk.sk_priority); skb->mark = READ_ONCE(xs->sk.sk_mark); skb->destructor = xsk_destruct_skb; - xsk_skb_destructor_set_addr(skb, addr); + return 0; } static void xsk_consume_skb(struct sk_buff *skb) @@ -740,7 +779,7 @@ static void xsk_consume_skb(struct sk_buff *skb) u32 num_descs = xsk_get_num_desc(skb); struct xsk_addrs *xsk_addr; - if (unlikely(num_descs > 1)) { + if (unlikely(!xsk_skb_destructor_is_addr(skb))) { xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; kmem_cache_free(xsk_tx_generic_cache, xsk_addr); } @@ -819,28 +858,19 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, return ERR_PTR(err); skb_reserve(skb, hr); - - xsk_skb_init_misc(skb, xs, desc->addr); if (desc->options & XDP_TX_METADATA) { err = xsk_skb_metadata(skb, buffer, desc, pool, hr); - if (unlikely(err)) + if (unlikely(err)) { + kfree_skb(skb); return ERR_PTR(err); + } } } else { struct xsk_addrs *xsk_addr; - if (xsk_skb_destructor_is_addr(skb)) { - xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, - GFP_KERNEL); - if (!xsk_addr) - return ERR_PTR(-ENOMEM); - - xsk_addr->num_descs = 1; - xsk_addr->addrs[0] = xsk_skb_destructor_get_addr(skb); - skb_shinfo(skb)->destructor_arg = (void *)xsk_addr; - } else { - xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; - } + xsk_addr = xsk_addrs_alloc(skb); + if (!xsk_addr) + return ERR_PTR(-ENOMEM); /* in case of -EOVERFLOW that could happen below, * xsk_consume_skb() will release this node as whole skb @@ -856,8 +886,11 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, addr = buffer - pool->addrs; for (copied = 0, i = skb_shinfo(skb)->nr_frags; copied < len; i++) { - if (unlikely(i >= MAX_SKB_FRAGS)) + if (unlikely(i >= MAX_SKB_FRAGS)) { + if (!xs->skb) + kfree_skb(skb); return ERR_PTR(-EOVERFLOW); + } page = pool->umem->pgs[addr >> PAGE_SHIFT]; get_page(page); @@ -914,7 +947,6 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, if (unlikely(err)) goto free_err; - xsk_skb_init_misc(skb, xs, desc->addr); if (desc->options & XDP_TX_METADATA) { err = xsk_skb_metadata(skb, buffer, desc, xs->pool, hr); @@ -927,19 +959,10 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, struct page *page; u8 *vaddr; - if (xsk_skb_destructor_is_addr(skb)) { - xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, - GFP_KERNEL); - if (!xsk_addr) { - err = -ENOMEM; - goto free_err; - } - - xsk_addr->num_descs = 1; - xsk_addr->addrs[0] = xsk_skb_destructor_get_addr(skb); - skb_shinfo(skb)->destructor_arg = (void *)xsk_addr; - } else { - xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; + xsk_addr = xsk_addrs_alloc(skb); + if (!xsk_addr) { + err = -ENOMEM; + goto free_err; } if (unlikely(nr_frags == (MAX_SKB_FRAGS - 1) && xp_mb_desc(desc))) { @@ -964,18 +987,28 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, } } + if (!xs->skb) { + err = xsk_skb_init_misc(skb, xs, desc->addr); + if (unlikely(err)) + goto free_err; + } xsk_inc_num_desc(skb); return skb; free_err: - if (skb && !skb_shinfo(skb)->nr_frags) + if (skb && !xs->skb) kfree_skb(skb); if (err == -EOVERFLOW) { - /* Drop the packet */ - xsk_inc_num_desc(xs->skb); - xsk_drop_skb(xs->skb); + if (xs->skb) { + /* Drop the packet */ + xsk_inc_num_desc(xs->skb); + xsk_drop_skb(xs->skb); + } else { + xsk_cq_cancel_locked(xs->pool, 1); + xs->tx->invalid_descs++; + } xskq_cons_release(xs->tx); } else { /* Let application retry */ diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index cd7bc50872f6..d981cfdd8535 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -175,6 +175,9 @@ int xp_assign_dev(struct xsk_buff_pool *pool, if (force_zc && force_copy) return -EINVAL; + if (pool->tx_sw_csum && (netdev->priv_flags & IFF_TX_SKB_NO_LINEAR)) + return -EOPNOTSUPP; + if (xsk_get_pool_from_qid(netdev, queue_id)) return -EBUSY; diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c index afa457506274..3bff346308d0 100644 --- a/net/xdp/xskmap.c +++ b/net/xdp/xskmap.c @@ -184,6 +184,10 @@ static long xsk_map_update_elem(struct bpf_map *map, void *key, void *value, } xs = (struct xdp_sock *)sock->sk; + if (!READ_ONCE(xs->rx)) { + sockfd_put(sock); + return -ENOBUFS; + } map_entry = &m->xsk_map[i]; node = xsk_map_node_alloc(m, map_entry); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index a9652b422f51..cc35c2fcbbe0 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -66,7 +66,9 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) struct iphdr *iph = ip_hdr(skb); int ihl = iph->ihl * 4; - skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + if (!skb->inner_protocol) + skb_set_inner_transport_header(skb, + skb_transport_offset(skb)); skb_set_network_header(skb, -x->props.header_len); skb->mac_header = skb->network_header + @@ -167,7 +169,9 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) int hdr_len; iph = ipv6_hdr(skb); - skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + if (!skb->inner_protocol) + skb_set_inner_transport_header(skb, + skb_transport_offset(skb)); hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr); if (hdr_len < 0) @@ -276,8 +280,10 @@ static int xfrm4_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) struct iphdr *top_iph; int flags; - skb_set_inner_network_header(skb, skb_network_offset(skb)); - skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + if (!skb->inner_protocol) { + skb_set_inner_network_header(skb, skb_network_offset(skb)); + skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + } skb_set_network_header(skb, -x->props.header_len); skb->mac_header = skb->network_header + @@ -321,8 +327,10 @@ static int xfrm6_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) struct ipv6hdr *top_iph; int dsfield; - skb_set_inner_network_header(skb, skb_network_offset(skb)); - skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + if (!skb->inner_protocol) { + skb_set_inner_network_header(skb, skb_network_offset(skb)); + skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + } skb_set_network_header(skb, -x->props.header_len); skb->mac_header = skb->network_header + diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 1748d374abca..686014d39429 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -818,17 +818,17 @@ int __xfrm_state_delete(struct xfrm_state *x) spin_lock(&net->xfrm.xfrm_state_lock); list_del(&x->km.all); - hlist_del_rcu(&x->bydst); - hlist_del_rcu(&x->bysrc); - if (x->km.seq) - hlist_del_rcu(&x->byseq); + hlist_del_init_rcu(&x->bydst); + hlist_del_init_rcu(&x->bysrc); + if (!hlist_unhashed(&x->byseq)) + hlist_del_init_rcu(&x->byseq); if (!hlist_unhashed(&x->state_cache)) hlist_del_rcu(&x->state_cache); if (!hlist_unhashed(&x->state_cache_input)) hlist_del_rcu(&x->state_cache_input); - if (x->id.spi) - hlist_del_rcu(&x->byspi); + if (!hlist_unhashed(&x->byspi)) + hlist_del_init_rcu(&x->byspi); net->xfrm.state_num--; xfrm_nat_keepalive_state_updated(x); spin_unlock(&net->xfrm.xfrm_state_lock); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index d56450f61669..38a90e5ee3d9 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -3323,6 +3323,7 @@ const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { [XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = sizeof(u32), [XFRM_MSG_NEWSPDINFO - XFRM_MSG_BASE] = sizeof(u32), [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = sizeof(u32), + [XFRM_MSG_MAPPING - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_mapping), [XFRM_MSG_SETDEFAULT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_default), [XFRM_MSG_GETDEFAULT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_default), }; |
