From 28232a4317be7ad615f0f1b69dc8583fd580a8e3 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sat, 20 May 2017 14:12:34 +0200 Subject: KVM: arm/arm64: Fix isues with GICv2 on GICv3 migration We have been a little loose with our intermediate VMCR representation where we had a 'ctlr' field, but we failed to differentiate between the GICv2 GICC_CTLR and ICC_CTLR_EL1 layouts, and therefore ended up mapping the wrong bits into the individual fields of the ICH_VMCR_EL2 when emulating a GICv2 on a GICv3 system. Fix this by using explicit fields for the VMCR bits instead. Cc: Eric Auger Reported-by: wanghaibin Signed-off-by: Christoffer Dall Reviewed-by: Marc Zyngier Tested-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 4 ++++ include/linux/irqchip/arm-gic.h | 28 +++++++++++++++++++++++++--- 2 files changed, 29 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index fffb91202bc9..1fa293a37f4a 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -417,6 +417,10 @@ #define ICH_HCR_EN (1 << 0) #define ICH_HCR_UIE (1 << 1) +#define ICH_VMCR_ACK_CTL_SHIFT 2 +#define ICH_VMCR_ACK_CTL_MASK (1 << ICH_VMCR_ACK_CTL_SHIFT) +#define ICH_VMCR_FIQ_EN_SHIFT 3 +#define ICH_VMCR_FIQ_EN_MASK (1 << ICH_VMCR_FIQ_EN_SHIFT) #define ICH_VMCR_CBPR_SHIFT 4 #define ICH_VMCR_CBPR_MASK (1 << ICH_VMCR_CBPR_SHIFT) #define ICH_VMCR_EOIM_SHIFT 9 diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index dc30f3d057eb..d3453ee072fc 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -25,7 +25,18 @@ #define GICC_ENABLE 0x1 #define GICC_INT_PRI_THRESHOLD 0xf0 -#define GIC_CPU_CTRL_EOImodeNS (1 << 9) +#define GIC_CPU_CTRL_EnableGrp0_SHIFT 0 +#define GIC_CPU_CTRL_EnableGrp0 (1 << GIC_CPU_CTRL_EnableGrp0_SHIFT) +#define GIC_CPU_CTRL_EnableGrp1_SHIFT 1 +#define GIC_CPU_CTRL_EnableGrp1 (1 << GIC_CPU_CTRL_EnableGrp1_SHIFT) +#define GIC_CPU_CTRL_AckCtl_SHIFT 2 +#define GIC_CPU_CTRL_AckCtl (1 << GIC_CPU_CTRL_AckCtl_SHIFT) +#define GIC_CPU_CTRL_FIQEn_SHIFT 3 +#define GIC_CPU_CTRL_FIQEn (1 << GIC_CPU_CTRL_FIQEn_SHIFT) +#define GIC_CPU_CTRL_CBPR_SHIFT 4 +#define GIC_CPU_CTRL_CBPR (1 << GIC_CPU_CTRL_CBPR_SHIFT) +#define GIC_CPU_CTRL_EOImodeNS_SHIFT 9 +#define GIC_CPU_CTRL_EOImodeNS (1 << GIC_CPU_CTRL_EOImodeNS_SHIFT) #define GICC_IAR_INT_ID_MASK 0x3ff #define GICC_INT_SPURIOUS 1023 @@ -84,8 +95,19 @@ #define GICH_LR_EOI (1 << 19) #define GICH_LR_HW (1 << 31) -#define GICH_VMCR_CTRL_SHIFT 0 -#define GICH_VMCR_CTRL_MASK (0x21f << GICH_VMCR_CTRL_SHIFT) +#define GICH_VMCR_ENABLE_GRP0_SHIFT 0 +#define GICH_VMCR_ENABLE_GRP0_MASK (1 << GICH_VMCR_ENABLE_GRP0_SHIFT) +#define GICH_VMCR_ENABLE_GRP1_SHIFT 1 +#define GICH_VMCR_ENABLE_GRP1_MASK (1 << GICH_VMCR_ENABLE_GRP1_SHIFT) +#define GICH_VMCR_ACK_CTL_SHIFT 2 +#define GICH_VMCR_ACK_CTL_MASK (1 << GICH_VMCR_ACK_CTL_SHIFT) +#define GICH_VMCR_FIQ_EN_SHIFT 3 +#define GICH_VMCR_FIQ_EN_MASK (1 << GICH_VMCR_FIQ_EN_SHIFT) +#define GICH_VMCR_CBPR_SHIFT 4 +#define GICH_VMCR_CBPR_MASK (1 << GICH_VMCR_CBPR_SHIFT) +#define GICH_VMCR_EOI_MODE_SHIFT 9 +#define GICH_VMCR_EOI_MODE_MASK (1 << GICH_VMCR_EOI_MODE_SHIFT) + #define GICH_VMCR_PRIMASK_SHIFT 27 #define GICH_VMCR_PRIMASK_MASK (0x1f << GICH_VMCR_PRIMASK_SHIFT) #define GICH_VMCR_BINPOINT_SHIFT 21 -- cgit v1.2.3 From b8cb5a545c3dd8b975aad19ea020eabe0a888e8d Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Wed, 24 May 2017 18:24:07 -0400 Subject: ext4: fix quota charging for shared xattr blocks ext4_xattr_block_set() calls dquot_alloc_block() to charge for an xattr block when new references are made. However if dquot_initialize() hasn't been called on an inode, request for charging is effectively ignored because ext4_inode_info->i_dquot is not initialized yet. Add dquot_initialize() to call paths that lead to ext4_xattr_block_set(). Signed-off-by: Tahsin Erdogan Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- fs/ext4/acl.c | 4 ++++ fs/ext4/super.c | 3 +++ fs/ext4/xattr.c | 8 ++++++++ fs/quota/dquot.c | 16 ++++++++++++++++ include/linux/quotaops.h | 6 ++++++ 5 files changed, 37 insertions(+) (limited to 'include/linux') diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index fd389935ecd1..3ec0e46de95f 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -4,6 +4,7 @@ * Copyright (C) 2001-2003 Andreas Gruenbacher, */ +#include #include "ext4_jbd2.h" #include "ext4.h" #include "xattr.h" @@ -232,6 +233,9 @@ ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type) handle_t *handle; int error, retries = 0; + error = dquot_initialize(inode); + if (error) + return error; retry: handle = ext4_journal_start(inode, EXT4_HT_XATTR, ext4_jbd2_credits_xattr(inode)); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8f05c72bc4ec..d37c81f327e7 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1174,6 +1174,9 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len, return res; } + res = dquot_initialize(inode); + if (res) + return res; retry: handle = ext4_journal_start(inode, EXT4_HT_MISC, ext4_jbd2_credits_xattr(inode)); diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 8fb7ce14e6eb..5d3c2536641c 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -888,6 +888,8 @@ inserted: else { u32 ref; + WARN_ON_ONCE(dquot_initialize_needed(inode)); + /* The old block is released after updating the inode. */ error = dquot_alloc_block(inode, @@ -954,6 +956,8 @@ inserted: /* We need to allocate a new block */ ext4_fsblk_t goal, block; + WARN_ON_ONCE(dquot_initialize_needed(inode)); + goal = ext4_group_first_block_no(sb, EXT4_I(inode)->i_block_group); @@ -1166,6 +1170,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, return -EINVAL; if (strlen(name) > 255) return -ERANGE; + ext4_write_lock_xattr(inode, &no_expand); error = ext4_reserve_inode_write(handle, inode, &is.iloc); @@ -1267,6 +1272,9 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name, int error, retries = 0; int credits = ext4_jbd2_credits_xattr(inode); + error = dquot_initialize(inode); + if (error) + return error; retry: handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits); if (IS_ERR(handle)) { diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index ebf80c7739e1..48813aeaab80 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -1512,6 +1512,22 @@ int dquot_initialize(struct inode *inode) } EXPORT_SYMBOL(dquot_initialize); +bool dquot_initialize_needed(struct inode *inode) +{ + struct dquot **dquots; + int i; + + if (!dquot_active(inode)) + return false; + + dquots = i_dquot(inode); + for (i = 0; i < MAXQUOTAS; i++) + if (!dquots[i] && sb_has_quota_active(inode->i_sb, i)) + return true; + return false; +} +EXPORT_SYMBOL(dquot_initialize_needed); + /* * Release all quotas referenced by inode. * diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 9c6f768b7d32..dda22f45fc1b 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -44,6 +44,7 @@ void inode_sub_rsv_space(struct inode *inode, qsize_t number); void inode_reclaim_rsv_space(struct inode *inode, qsize_t number); int dquot_initialize(struct inode *inode); +bool dquot_initialize_needed(struct inode *inode); void dquot_drop(struct inode *inode); struct dquot *dqget(struct super_block *sb, struct kqid qid); static inline struct dquot *dqgrab(struct dquot *dquot) @@ -207,6 +208,11 @@ static inline int dquot_initialize(struct inode *inode) return 0; } +static inline bool dquot_initialize_needed(struct inode *inode) +{ + return false; +} + static inline void dquot_drop(struct inode *inode) { } -- cgit v1.2.3 From cf124db566e6b036b8bcbe8decbed740bdfac8c6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 8 May 2017 12:52:56 -0400 Subject: net: Fix inconsistent teardown and release of private netdev state. Network devices can allocate reasources and private memory using netdev_ops->ndo_init(). However, the release of these resources can occur in one of two different places. Either netdev_ops->ndo_uninit() or netdev->destructor(). The decision of which operation frees the resources depends upon whether it is necessary for all netdev refs to be released before it is safe to perform the freeing. netdev_ops->ndo_uninit() presumably can occur right after the NETDEV_UNREGISTER notifier completes and the unicast and multicast address lists are flushed. netdev->destructor(), on the other hand, does not run until the netdev references all go away. Further complicating the situation is that netdev->destructor() almost universally does also a free_netdev(). This creates a problem for the logic in register_netdevice(). Because all callers of register_netdevice() manage the freeing of the netdev, and invoke free_netdev(dev) if register_netdevice() fails. If netdev_ops->ndo_init() succeeds, but something else fails inside of register_netdevice(), it does call ndo_ops->ndo_uninit(). But it is not able to invoke netdev->destructor(). This is because netdev->destructor() will do a free_netdev() and then the caller of register_netdevice() will do the same. However, this means that the resources that would normally be released by netdev->destructor() will not be. Over the years drivers have added local hacks to deal with this, by invoking their destructor parts by hand when register_netdevice() fails. Many drivers do not try to deal with this, and instead we have leaks. Let's close this hole by formalizing the distinction between what private things need to be freed up by netdev->destructor() and whether the driver needs unregister_netdevice() to perform the free_netdev(). netdev->priv_destructor() performs all actions to free up the private resources that used to be freed by netdev->destructor(), except for free_netdev(). netdev->needs_free_netdev is a boolean that indicates whether free_netdev() should be done at the end of unregister_netdevice(). Now, register_netdevice() can sanely release all resources after ndo_ops->ndo_init() succeeds, by invoking both ndo_ops->ndo_uninit() and netdev->priv_destructor(). And at the end of unregister_netdevice(), we invoke netdev->priv_destructor() and optionally call free_netdev(). Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 6 +++--- drivers/net/caif/caif_hsi.c | 2 +- drivers/net/caif/caif_serial.c | 2 +- drivers/net/caif/caif_spi.c | 2 +- drivers/net/caif/caif_virtio.c | 2 +- drivers/net/can/slcan.c | 7 +++---- drivers/net/can/vcan.c | 2 +- drivers/net/can/vxcan.c | 2 +- drivers/net/dummy.c | 4 ++-- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 +- drivers/net/geneve.c | 2 +- drivers/net/gtp.c | 2 +- drivers/net/hamradio/6pack.c | 2 +- drivers/net/hamradio/bpqether.c | 2 +- drivers/net/ifb.c | 4 ++-- drivers/net/ipvlan/ipvlan_main.c | 2 +- drivers/net/loopback.c | 4 ++-- drivers/net/macsec.c | 4 ++-- drivers/net/macvlan.c | 2 +- drivers/net/nlmon.c | 2 +- drivers/net/slip/slip.c | 7 +++---- drivers/net/team/team.c | 4 ++-- drivers/net/tun.c | 4 ++-- drivers/net/usb/cdc-phonet.c | 2 +- drivers/net/usb/qmi_wwan.c | 2 +- drivers/net/veth.c | 4 ++-- drivers/net/vrf.c | 2 +- drivers/net/vsockmon.c | 2 +- drivers/net/vxlan.c | 2 +- drivers/net/wan/dlci.c | 2 +- drivers/net/wan/hdlc_fr.c | 2 +- drivers/net/wan/lapbether.c | 2 +- drivers/net/wireless/ath/ath6kl/main.c | 2 +- drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 1 - drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c | 3 ++- drivers/net/wireless/intersil/hostap/hostap_main.c | 2 +- drivers/net/wireless/mac80211_hwsim.c | 2 +- drivers/net/wireless/marvell/mwifiex/main.c | 2 +- drivers/staging/rtl8188eu/os_dep/mon.c | 2 +- drivers/usb/gadget/function/f_phonet.c | 2 +- include/linux/netdevice.h | 7 ++++--- net/8021q/vlan_dev.c | 4 ++-- net/batman-adv/soft-interface.c | 5 ++--- net/bluetooth/6lowpan.c | 2 +- net/bridge/br_device.c | 2 +- net/caif/chnl_net.c | 4 ++-- net/core/dev.c | 8 ++++++-- net/hsr/hsr_device.c | 4 ++-- net/ieee802154/6lowpan/core.c | 2 +- net/ipv4/ip_tunnel.c | 4 ++-- net/ipv4/ipmr.c | 2 +- net/ipv6/ip6_gre.c | 9 +++++---- net/ipv6/ip6_tunnel.c | 8 ++++---- net/ipv6/ip6_vti.c | 8 ++++---- net/ipv6/ip6mr.c | 2 +- net/ipv6/sit.c | 6 +++--- net/irda/irlan/irlan_eth.c | 2 +- net/l2tp/l2tp_eth.c | 2 +- net/mac80211/iface.c | 6 +++--- net/mac802154/iface.c | 7 +++---- net/openvswitch/vport-internal_dev.c | 4 ++-- net/phonet/pep-gprs.c | 2 +- 62 files changed, 105 insertions(+), 103 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 2359478b977f..8ab6bdbe1682 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4192,7 +4192,6 @@ static void bond_destructor(struct net_device *bond_dev) struct bonding *bond = netdev_priv(bond_dev); if (bond->wq) destroy_workqueue(bond->wq); - free_netdev(bond_dev); } void bond_setup(struct net_device *bond_dev) @@ -4212,7 +4211,8 @@ void bond_setup(struct net_device *bond_dev) bond_dev->netdev_ops = &bond_netdev_ops; bond_dev->ethtool_ops = &bond_ethtool_ops; - bond_dev->destructor = bond_destructor; + bond_dev->needs_free_netdev = true; + bond_dev->priv_destructor = bond_destructor; SET_NETDEV_DEVTYPE(bond_dev, &bond_type); @@ -4736,7 +4736,7 @@ int bond_create(struct net *net, const char *name) rtnl_unlock(); if (res < 0) - bond_destructor(bond_dev); + free_netdev(bond_dev); return res; } diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c index ddabce759456..71a7c3b44fdd 100644 --- a/drivers/net/caif/caif_hsi.c +++ b/drivers/net/caif/caif_hsi.c @@ -1121,7 +1121,7 @@ static void cfhsi_setup(struct net_device *dev) dev->flags = IFF_POINTOPOINT | IFF_NOARP; dev->mtu = CFHSI_MAX_CAIF_FRAME_SZ; dev->priv_flags |= IFF_NO_QUEUE; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; dev->netdev_ops = &cfhsi_netdevops; for (i = 0; i < CFHSI_PRIO_LAST; ++i) skb_queue_head_init(&cfhsi->qhead[i]); diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c index c2dea4916e5d..76e1d3545105 100644 --- a/drivers/net/caif/caif_serial.c +++ b/drivers/net/caif/caif_serial.c @@ -428,7 +428,7 @@ static void caifdev_setup(struct net_device *dev) dev->flags = IFF_POINTOPOINT | IFF_NOARP; dev->mtu = CAIF_MAX_MTU; dev->priv_flags |= IFF_NO_QUEUE; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; skb_queue_head_init(&serdev->head); serdev->common.link_select = CAIF_LINK_LOW_LATENCY; serdev->common.use_frag = true; diff --git a/drivers/net/caif/caif_spi.c b/drivers/net/caif/caif_spi.c index 3a529fbe539f..fc21afe852b9 100644 --- a/drivers/net/caif/caif_spi.c +++ b/drivers/net/caif/caif_spi.c @@ -712,7 +712,7 @@ static void cfspi_setup(struct net_device *dev) dev->flags = IFF_NOARP | IFF_POINTOPOINT; dev->priv_flags |= IFF_NO_QUEUE; dev->mtu = SPI_MAX_PAYLOAD_SIZE; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; skb_queue_head_init(&cfspi->qhead); skb_queue_head_init(&cfspi->chead); cfspi->cfdev.link_select = CAIF_LINK_HIGH_BANDW; diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c index 6122768c8644..1794ea0420b7 100644 --- a/drivers/net/caif/caif_virtio.c +++ b/drivers/net/caif/caif_virtio.c @@ -617,7 +617,7 @@ static void cfv_netdev_setup(struct net_device *netdev) netdev->tx_queue_len = 100; netdev->flags = IFF_POINTOPOINT | IFF_NOARP; netdev->mtu = CFV_DEF_MTU_SIZE; - netdev->destructor = free_netdev; + netdev->needs_free_netdev = true; } /* Create debugfs counters for the device */ diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index eb7173713bbc..6a6e896e52fa 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -417,7 +417,7 @@ static int slc_open(struct net_device *dev) static void slc_free_netdev(struct net_device *dev) { int i = dev->base_addr; - free_netdev(dev); + slcan_devs[i] = NULL; } @@ -436,7 +436,8 @@ static const struct net_device_ops slc_netdev_ops = { static void slc_setup(struct net_device *dev) { dev->netdev_ops = &slc_netdev_ops; - dev->destructor = slc_free_netdev; + dev->needs_free_netdev = true; + dev->priv_destructor = slc_free_netdev; dev->hard_header_len = 0; dev->addr_len = 0; @@ -761,8 +762,6 @@ static void __exit slcan_exit(void) if (sl->tty) { printk(KERN_ERR "%s: tty discipline still running\n", dev->name); - /* Intentionally leak the control block. */ - dev->destructor = NULL; } unregister_netdev(dev); diff --git a/drivers/net/can/vcan.c b/drivers/net/can/vcan.c index facca33d53e9..0eda1b308583 100644 --- a/drivers/net/can/vcan.c +++ b/drivers/net/can/vcan.c @@ -163,7 +163,7 @@ static void vcan_setup(struct net_device *dev) dev->flags |= IFF_ECHO; dev->netdev_ops = &vcan_netdev_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; } static struct rtnl_link_ops vcan_link_ops __read_mostly = { diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c index 7fbb24795681..30cf2368becf 100644 --- a/drivers/net/can/vxcan.c +++ b/drivers/net/can/vxcan.c @@ -156,7 +156,7 @@ static void vxcan_setup(struct net_device *dev) dev->tx_queue_len = 0; dev->flags = (IFF_NOARP|IFF_ECHO); dev->netdev_ops = &vxcan_netdev_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; } /* forward declaration for rtnl_create_link() */ diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index 149244aac20a..9905b52fe293 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -328,7 +328,6 @@ static void dummy_free_netdev(struct net_device *dev) struct dummy_priv *priv = netdev_priv(dev); kfree(priv->vfinfo); - free_netdev(dev); } static void dummy_setup(struct net_device *dev) @@ -338,7 +337,8 @@ static void dummy_setup(struct net_device *dev) /* Initialize the device structure. */ dev->netdev_ops = &dummy_netdev_ops; dev->ethtool_ops = &dummy_ethtool_ops; - dev->destructor = dummy_free_netdev; + dev->needs_free_netdev = true; + dev->priv_destructor = dummy_free_netdev; /* Fill in device structure with ethernet-generic values. */ dev->flags |= IFF_NOARP; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 77ed2f628f9c..ea1bfcf1870a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -4525,7 +4525,7 @@ static void dummy_setup(struct net_device *dev) /* Initialize the device structure. */ dev->netdev_ops = &cxgb4_mgmt_netdev_ops; dev->ethtool_ops = &cxgb4_mgmt_ethtool_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; } static int config_mgmt_dev(struct pci_dev *pdev) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 6ebb0f559a42..199459bd6961 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1007,7 +1007,7 @@ static void geneve_setup(struct net_device *dev) dev->netdev_ops = &geneve_netdev_ops; dev->ethtool_ops = &geneve_ethtool_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; SET_NETDEV_DEVTYPE(dev, &geneve_type); diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 7b652bb7ebe4..ca110cd2a4e4 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -611,7 +611,7 @@ static const struct net_device_ops gtp_netdev_ops = { static void gtp_link_setup(struct net_device *dev) { dev->netdev_ops = >p_netdev_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; dev->hard_header_len = 0; dev->addr_len = 0; diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 922bf440e9f1..021a8ec411ab 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -311,7 +311,7 @@ static void sp_setup(struct net_device *dev) { /* Finish setting up the DEVICE info. */ dev->netdev_ops = &sp_netdev_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; dev->mtu = SIXP_MTU; dev->hard_header_len = AX25_MAX_HEADER_LEN; dev->header_ops = &ax25_header_ops; diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index f62e7f325cf9..78a6414c5fd9 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -476,7 +476,7 @@ static const struct net_device_ops bpq_netdev_ops = { static void bpq_setup(struct net_device *dev) { dev->netdev_ops = &bpq_netdev_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; memcpy(dev->broadcast, &ax25_bcast, AX25_ADDR_LEN); memcpy(dev->dev_addr, &ax25_defaddr, AX25_ADDR_LEN); diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index 312fce7302d3..144ea5ae8ab4 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -207,7 +207,6 @@ static void ifb_dev_free(struct net_device *dev) __skb_queue_purge(&txp->tq); } kfree(dp->tx_private); - free_netdev(dev); } static void ifb_setup(struct net_device *dev) @@ -230,7 +229,8 @@ static void ifb_setup(struct net_device *dev) dev->priv_flags &= ~IFF_TX_SKB_SHARING; netif_keep_dst(dev); eth_hw_addr_random(dev); - dev->destructor = ifb_dev_free; + dev->needs_free_netdev = true; + dev->priv_destructor = ifb_dev_free; } static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev) diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 618ed88fad0f..7c7680c8f0e3 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -632,7 +632,7 @@ void ipvlan_link_setup(struct net_device *dev) dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE; dev->netdev_ops = &ipvlan_netdev_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; dev->header_ops = &ipvlan_header_ops; dev->ethtool_ops = &ipvlan_ethtool_ops; } diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 224f65cb576b..30612497643c 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -159,7 +159,6 @@ static void loopback_dev_free(struct net_device *dev) { dev_net(dev)->loopback_dev = NULL; free_percpu(dev->lstats); - free_netdev(dev); } static const struct net_device_ops loopback_ops = { @@ -196,7 +195,8 @@ static void loopback_setup(struct net_device *dev) dev->ethtool_ops = &loopback_ethtool_ops; dev->header_ops = ð_header_ops; dev->netdev_ops = &loopback_ops; - dev->destructor = loopback_dev_free; + dev->needs_free_netdev = true; + dev->priv_destructor = loopback_dev_free; } /* Setup and register the loopback device. */ diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index cdc347be68f2..79411675f0e6 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2996,7 +2996,6 @@ static void macsec_free_netdev(struct net_device *dev) free_percpu(macsec->secy.tx_sc.stats); dev_put(real_dev); - free_netdev(dev); } static void macsec_setup(struct net_device *dev) @@ -3006,7 +3005,8 @@ static void macsec_setup(struct net_device *dev) dev->max_mtu = ETH_MAX_MTU; dev->priv_flags |= IFF_NO_QUEUE; dev->netdev_ops = &macsec_netdev_ops; - dev->destructor = macsec_free_netdev; + dev->needs_free_netdev = true; + dev->priv_destructor = macsec_free_netdev; SET_NETDEV_DEVTYPE(dev, &macsec_type); eth_zero_addr(dev->broadcast); diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 346ad2ff3998..67bf7ebae5c6 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1092,7 +1092,7 @@ void macvlan_common_setup(struct net_device *dev) netif_keep_dst(dev); dev->priv_flags |= IFF_UNICAST_FLT; dev->netdev_ops = &macvlan_netdev_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; dev->header_ops = &macvlan_hard_header_ops; dev->ethtool_ops = &macvlan_ethtool_ops; } diff --git a/drivers/net/nlmon.c b/drivers/net/nlmon.c index b91603835d26..c4b3362da4a2 100644 --- a/drivers/net/nlmon.c +++ b/drivers/net/nlmon.c @@ -113,7 +113,7 @@ static void nlmon_setup(struct net_device *dev) dev->netdev_ops = &nlmon_ops; dev->ethtool_ops = &nlmon_ethtool_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | NETIF_F_LLTX; diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index 1da31dc47f86..74b907206aa7 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -629,7 +629,7 @@ static void sl_uninit(struct net_device *dev) static void sl_free_netdev(struct net_device *dev) { int i = dev->base_addr; - free_netdev(dev); + slip_devs[i] = NULL; } @@ -651,7 +651,8 @@ static const struct net_device_ops sl_netdev_ops = { static void sl_setup(struct net_device *dev) { dev->netdev_ops = &sl_netdev_ops; - dev->destructor = sl_free_netdev; + dev->needs_free_netdev = true; + dev->priv_destructor = sl_free_netdev; dev->hard_header_len = 0; dev->addr_len = 0; @@ -1369,8 +1370,6 @@ static void __exit slip_exit(void) if (sl->tty) { printk(KERN_ERR "%s: tty discipline still running\n", dev->name); - /* Intentionally leak the control block. */ - dev->destructor = NULL; } unregister_netdev(dev); diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 6c5d5ef46f75..fba8c136aa7c 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1643,7 +1643,6 @@ static void team_destructor(struct net_device *dev) struct team *team = netdev_priv(dev); free_percpu(team->pcpu_stats); - free_netdev(dev); } static int team_open(struct net_device *dev) @@ -2079,7 +2078,8 @@ static void team_setup(struct net_device *dev) dev->netdev_ops = &team_netdev_ops; dev->ethtool_ops = &team_ethtool_ops; - dev->destructor = team_destructor; + dev->needs_free_netdev = true; + dev->priv_destructor = team_destructor; dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); dev->priv_flags |= IFF_NO_QUEUE; dev->priv_flags |= IFF_TEAM; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index bbd707b9ef7a..9ee7d4275640 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1560,7 +1560,6 @@ static void tun_free_netdev(struct net_device *dev) free_percpu(tun->pcpu_stats); tun_flow_uninit(tun); security_tun_dev_free_security(tun->security); - free_netdev(dev); } static void tun_setup(struct net_device *dev) @@ -1571,7 +1570,8 @@ static void tun_setup(struct net_device *dev) tun->group = INVALID_GID; dev->ethtool_ops = &tun_ethtool_ops; - dev->destructor = tun_free_netdev; + dev->needs_free_netdev = true; + dev->priv_destructor = tun_free_netdev; /* We prefer our own queue length */ dev->tx_queue_len = TUN_READQ_SIZE; } diff --git a/drivers/net/usb/cdc-phonet.c b/drivers/net/usb/cdc-phonet.c index eb52de8205f0..c7a350bbaaa7 100644 --- a/drivers/net/usb/cdc-phonet.c +++ b/drivers/net/usb/cdc-phonet.c @@ -298,7 +298,7 @@ static void usbpn_setup(struct net_device *dev) dev->addr_len = 1; dev->tx_queue_len = 3; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; } /* diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 8f923a147fa9..949671ce4039 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -123,7 +123,7 @@ static void qmimux_setup(struct net_device *dev) dev->addr_len = 0; dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; dev->netdev_ops = &qmimux_netdev_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; } static struct net_device *qmimux_find_dev(struct usbnet *dev, u8 mux_id) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 38f0f03a29c8..0156fe8cac17 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -222,7 +222,6 @@ static int veth_dev_init(struct net_device *dev) static void veth_dev_free(struct net_device *dev) { free_percpu(dev->vstats); - free_netdev(dev); } #ifdef CONFIG_NET_POLL_CONTROLLER @@ -317,7 +316,8 @@ static void veth_setup(struct net_device *dev) NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX); - dev->destructor = veth_dev_free; + dev->needs_free_netdev = true; + dev->priv_destructor = veth_dev_free; dev->max_mtu = ETH_MAX_MTU; dev->hw_features = VETH_FEATURES; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index db882493875c..d38f11d833fe 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1348,7 +1348,7 @@ static void vrf_setup(struct net_device *dev) dev->netdev_ops = &vrf_netdev_ops; dev->l3mdev_ops = &vrf_l3mdev_ops; dev->ethtool_ops = &vrf_ethtool_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; /* Fill in device structure with ethernet-generic values. */ eth_hw_addr_random(dev); diff --git a/drivers/net/vsockmon.c b/drivers/net/vsockmon.c index 7f0136f2dd9d..c28bdce14fd5 100644 --- a/drivers/net/vsockmon.c +++ b/drivers/net/vsockmon.c @@ -135,7 +135,7 @@ static void vsockmon_setup(struct net_device *dev) dev->netdev_ops = &vsockmon_ops; dev->ethtool_ops = &vsockmon_ethtool_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | NETIF_F_LLTX; diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index a6b5052c1d36..5fa798a5c9a6 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2611,7 +2611,7 @@ static void vxlan_setup(struct net_device *dev) eth_hw_addr_random(dev); ether_setup(dev); - dev->destructor = free_netdev; + dev->needs_free_netdev = true; SET_NETDEV_DEVTYPE(dev, &vxlan_type); dev->features |= NETIF_F_LLTX; diff --git a/drivers/net/wan/dlci.c b/drivers/net/wan/dlci.c index 65ee2a6f248c..a0d76f70c428 100644 --- a/drivers/net/wan/dlci.c +++ b/drivers/net/wan/dlci.c @@ -475,7 +475,7 @@ static void dlci_setup(struct net_device *dev) dev->flags = 0; dev->header_ops = &dlci_header_ops; dev->netdev_ops = &dlci_netdev_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; dlp->receive = dlci_receive; diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index eb915281197e..78596e42a3f3 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -1106,7 +1106,7 @@ static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type) return -EIO; } - dev->destructor = free_netdev; + dev->needs_free_netdev = true; *get_dev_p(pvc, type) = dev; if (!used) { state(hdlc)->dce_changed = 1; diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index 9df9ed62beff..63f749078a1f 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -306,7 +306,7 @@ static const struct net_device_ops lapbeth_netdev_ops = { static void lapbeth_setup(struct net_device *dev) { dev->netdev_ops = &lapbeth_netdev_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; dev->type = ARPHRD_X25; dev->hard_header_len = 3; dev->mtu = 1000; diff --git a/drivers/net/wireless/ath/ath6kl/main.c b/drivers/net/wireless/ath/ath6kl/main.c index 91ee542de3d7..b90c77ef792e 100644 --- a/drivers/net/wireless/ath/ath6kl/main.c +++ b/drivers/net/wireless/ath/ath6kl/main.c @@ -1287,7 +1287,7 @@ void init_netdev(struct net_device *dev) struct ath6kl *ar = ath6kl_priv(dev); dev->netdev_ops = &ath6kl_netdev_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; dev->watchdog_timeo = ATH6KL_TX_TIMEOUT; dev->needed_headroom = ETH_HLEN; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index cd1d6730eab7..617199c0e5a0 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -5225,7 +5225,6 @@ void brcmf_cfg80211_free_netdev(struct net_device *ndev) if (vif) brcmf_free_vif(vif); - free_netdev(ndev); } static bool brcmf_is_linkup(const struct brcmf_event_msg *e) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c index a3d82368f1a9..511d190c6cca 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c @@ -624,7 +624,8 @@ struct brcmf_if *brcmf_add_if(struct brcmf_pub *drvr, s32 bsscfgidx, s32 ifidx, if (!ndev) return ERR_PTR(-ENOMEM); - ndev->destructor = brcmf_cfg80211_free_netdev; + ndev->needs_free_netdev = true; + ndev->priv_destructor = brcmf_cfg80211_free_netdev; ifp = netdev_priv(ndev); ifp->ndev = ndev; /* store mapping ifidx to bsscfgidx */ diff --git a/drivers/net/wireless/intersil/hostap/hostap_main.c b/drivers/net/wireless/intersil/hostap/hostap_main.c index 544fc09dcb62..1372b20f931e 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_main.c +++ b/drivers/net/wireless/intersil/hostap/hostap_main.c @@ -73,7 +73,7 @@ struct net_device * hostap_add_interface(struct local_info *local, dev->mem_end = mdev->mem_end; hostap_setup_dev(dev, local, type); - dev->destructor = free_netdev; + dev->needs_free_netdev = true; sprintf(dev->name, "%s%s", prefix, name); if (!rtnl_locked) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 002b25cff5b6..c854a557998b 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -2861,7 +2861,7 @@ static const struct net_device_ops hwsim_netdev_ops = { static void hwsim_mon_setup(struct net_device *dev) { dev->netdev_ops = &hwsim_netdev_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; ether_setup(dev); dev->priv_flags |= IFF_NO_QUEUE; dev->type = ARPHRD_IEEE80211_RADIOTAP; diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c index dd87b9ff64c3..39b6b5e3f6e0 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.c +++ b/drivers/net/wireless/marvell/mwifiex/main.c @@ -1280,7 +1280,7 @@ void mwifiex_init_priv_params(struct mwifiex_private *priv, struct net_device *dev) { dev->netdev_ops = &mwifiex_netdev_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; /* Initialize private structure */ priv->current_key_index = 0; priv->media_connected = false; diff --git a/drivers/staging/rtl8188eu/os_dep/mon.c b/drivers/staging/rtl8188eu/os_dep/mon.c index cfe37eb026d6..859d0d6051cd 100644 --- a/drivers/staging/rtl8188eu/os_dep/mon.c +++ b/drivers/staging/rtl8188eu/os_dep/mon.c @@ -152,7 +152,7 @@ static const struct net_device_ops mon_netdev_ops = { static void mon_setup(struct net_device *dev) { dev->netdev_ops = &mon_netdev_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; ether_setup(dev); dev->priv_flags |= IFF_NO_QUEUE; dev->type = ARPHRD_IEEE80211; diff --git a/drivers/usb/gadget/function/f_phonet.c b/drivers/usb/gadget/function/f_phonet.c index b4058f0000e4..6a1ce6a55158 100644 --- a/drivers/usb/gadget/function/f_phonet.c +++ b/drivers/usb/gadget/function/f_phonet.c @@ -281,7 +281,7 @@ static void pn_net_setup(struct net_device *dev) dev->tx_queue_len = 1; dev->netdev_ops = &pn_netdev_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; dev->header_ops = &phonet_header_ops; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3f39d27decf4..ab7ca3fdc495 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1596,8 +1596,8 @@ enum netdev_priv_flags { * @rtnl_link_state: This enum represents the phases of creating * a new link * - * @destructor: Called from unregister, - * can be used to call free_netdev + * @needs_free_netdev: Should unregister perform free_netdev? + * @priv_destructor: Called from unregister * @npinfo: XXX: need comments on this one * @nd_net: Network namespace this network device is inside * @@ -1858,7 +1858,8 @@ struct net_device { RTNL_LINK_INITIALIZING, } rtnl_link_state:16; - void (*destructor)(struct net_device *dev); + bool needs_free_netdev; + void (*priv_destructor)(struct net_device *dev); #ifdef CONFIG_NETPOLL struct netpoll_info __rcu *npinfo; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 953b6728bd00..abc5f400fc71 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -813,7 +813,6 @@ static void vlan_dev_free(struct net_device *dev) free_percpu(vlan->vlan_pcpu_stats); vlan->vlan_pcpu_stats = NULL; - free_netdev(dev); } void vlan_setup(struct net_device *dev) @@ -826,7 +825,8 @@ void vlan_setup(struct net_device *dev) netif_keep_dst(dev); dev->netdev_ops = &vlan_netdev_ops; - dev->destructor = vlan_dev_free; + dev->needs_free_netdev = true; + dev->priv_destructor = vlan_dev_free; dev->ethtool_ops = &vlan_ethtool_ops; dev->min_mtu = 0; diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index b25789abf7b9..10f7edfb176e 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -1034,8 +1034,6 @@ static void batadv_softif_free(struct net_device *dev) * netdev and its private data (bat_priv) */ rcu_barrier(); - - free_netdev(dev); } /** @@ -1047,7 +1045,8 @@ static void batadv_softif_init_early(struct net_device *dev) ether_setup(dev); dev->netdev_ops = &batadv_netdev_ops; - dev->destructor = batadv_softif_free; + dev->needs_free_netdev = true; + dev->priv_destructor = batadv_softif_free; dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_NETNS_LOCAL; dev->priv_flags |= IFF_NO_QUEUE; diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 608959989f8e..ab3b654b05cc 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -598,7 +598,7 @@ static void netdev_setup(struct net_device *dev) dev->netdev_ops = &netdev_ops; dev->header_ops = &header_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; } static struct device_type bt_type = { diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 430b53e7d941..f0f3447e8aa4 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -379,7 +379,7 @@ void br_dev_setup(struct net_device *dev) ether_setup(dev); dev->netdev_ops = &br_netdev_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; dev->ethtool_ops = &br_ethtool_ops; SET_NETDEV_DEVTYPE(dev, &br_type); dev->priv_flags = IFF_EBRIDGE | IFF_NO_QUEUE; diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 1816fc9f1ee7..fe3c53efb949 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -392,14 +392,14 @@ static void chnl_net_destructor(struct net_device *dev) { struct chnl_net *priv = netdev_priv(dev); caif_free_client(&priv->chnl); - free_netdev(dev); } static void ipcaif_net_setup(struct net_device *dev) { struct chnl_net *priv; dev->netdev_ops = &netdev_ops; - dev->destructor = chnl_net_destructor; + dev->needs_free_netdev = true; + dev->priv_destructor = chnl_net_destructor; dev->flags |= IFF_NOARP; dev->flags |= IFF_POINTOPOINT; dev->mtu = GPRS_PDP_MTU; diff --git a/net/core/dev.c b/net/core/dev.c index 84e1e86a4bce..4c15466305c3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7502,6 +7502,8 @@ out: err_uninit: if (dev->netdev_ops->ndo_uninit) dev->netdev_ops->ndo_uninit(dev); + if (dev->priv_destructor) + dev->priv_destructor(dev); goto out; } EXPORT_SYMBOL(register_netdevice); @@ -7709,8 +7711,10 @@ void netdev_run_todo(void) WARN_ON(rcu_access_pointer(dev->ip6_ptr)); WARN_ON(dev->dn_ptr); - if (dev->destructor) - dev->destructor(dev); + if (dev->priv_destructor) + dev->priv_destructor(dev); + if (dev->needs_free_netdev) + free_netdev(dev); /* Report a network device has been unregistered */ rtnl_lock(); diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index c73160fb11e7..0a0a392dc2bd 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -378,7 +378,6 @@ static void hsr_dev_destroy(struct net_device *hsr_dev) del_timer_sync(&hsr->announce_timer); synchronize_rcu(); - free_netdev(hsr_dev); } static const struct net_device_ops hsr_device_ops = { @@ -404,7 +403,8 @@ void hsr_dev_setup(struct net_device *dev) SET_NETDEV_DEVTYPE(dev, &hsr_type); dev->priv_flags |= IFF_NO_QUEUE; - dev->destructor = hsr_dev_destroy; + dev->needs_free_netdev = true; + dev->priv_destructor = hsr_dev_destroy; dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index d7efbf0dad20..0a866f332290 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -107,7 +107,7 @@ static void lowpan_setup(struct net_device *ldev) ldev->netdev_ops = &lowpan_netdev_ops; ldev->header_ops = &lowpan_header_ops; - ldev->destructor = free_netdev; + ldev->needs_free_netdev = true; ldev->features |= NETIF_F_NETNS_LOCAL; } diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index b878ecbc0608..b436d0775631 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -967,7 +967,6 @@ static void ip_tunnel_dev_free(struct net_device *dev) gro_cells_destroy(&tunnel->gro_cells); dst_cache_destroy(&tunnel->dst_cache); free_percpu(dev->tstats); - free_netdev(dev); } void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) @@ -1155,7 +1154,8 @@ int ip_tunnel_init(struct net_device *dev) struct iphdr *iph = &tunnel->parms.iph; int err; - dev->destructor = ip_tunnel_dev_free; + dev->needs_free_netdev = true; + dev->priv_destructor = ip_tunnel_dev_free; dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 551de4d023a8..b4f9622ee9f5 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -501,7 +501,7 @@ static void reg_vif_setup(struct net_device *dev) dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; dev->flags = IFF_NOARP; dev->netdev_ops = ®_vif_netdev_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; dev->features |= NETIF_F_NETNS_LOCAL; } diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 0c5b4caa1949..64eea3962733 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -991,13 +991,13 @@ static void ip6gre_dev_free(struct net_device *dev) dst_cache_destroy(&t->dst_cache); free_percpu(dev->tstats); - free_netdev(dev); } static void ip6gre_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &ip6gre_netdev_ops; - dev->destructor = ip6gre_dev_free; + dev->needs_free_netdev = true; + dev->priv_destructor = ip6gre_dev_free; dev->type = ARPHRD_IP6GRE; @@ -1148,7 +1148,7 @@ static int __net_init ip6gre_init_net(struct net *net) return 0; err_reg_dev: - ip6gre_dev_free(ign->fb_tunnel_dev); + free_netdev(ign->fb_tunnel_dev); err_alloc_dev: return err; } @@ -1300,7 +1300,8 @@ static void ip6gre_tap_setup(struct net_device *dev) ether_setup(dev); dev->netdev_ops = &ip6gre_tap_netdev_ops; - dev->destructor = ip6gre_dev_free; + dev->needs_free_netdev = true; + dev->priv_destructor = ip6gre_dev_free; dev->features |= NETIF_F_NETNS_LOCAL; dev->priv_flags &= ~IFF_TX_SKB_SHARING; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 9b37f9747fc6..c3581973f5d7 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -254,7 +254,6 @@ static void ip6_dev_free(struct net_device *dev) gro_cells_destroy(&t->gro_cells); dst_cache_destroy(&t->dst_cache); free_percpu(dev->tstats); - free_netdev(dev); } static int ip6_tnl_create2(struct net_device *dev) @@ -322,7 +321,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) return t; failed_free: - ip6_dev_free(dev); + free_netdev(dev); failed: return ERR_PTR(err); } @@ -1777,7 +1776,8 @@ static const struct net_device_ops ip6_tnl_netdev_ops = { static void ip6_tnl_dev_setup(struct net_device *dev) { dev->netdev_ops = &ip6_tnl_netdev_ops; - dev->destructor = ip6_dev_free; + dev->needs_free_netdev = true; + dev->priv_destructor = ip6_dev_free; dev->type = ARPHRD_TUNNEL6; dev->flags |= IFF_NOARP; @@ -2224,7 +2224,7 @@ static int __net_init ip6_tnl_init_net(struct net *net) return 0; err_register: - ip6_dev_free(ip6n->fb_tnl_dev); + free_netdev(ip6n->fb_tnl_dev); err_alloc_dev: return err; } diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index d67ef56454b2..837ea1eefe7f 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -180,7 +180,6 @@ vti6_tnl_unlink(struct vti6_net *ip6n, struct ip6_tnl *t) static void vti6_dev_free(struct net_device *dev) { free_percpu(dev->tstats); - free_netdev(dev); } static int vti6_tnl_create2(struct net_device *dev) @@ -235,7 +234,7 @@ static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p return t; failed_free: - vti6_dev_free(dev); + free_netdev(dev); failed: return NULL; } @@ -842,7 +841,8 @@ static const struct net_device_ops vti6_netdev_ops = { static void vti6_dev_setup(struct net_device *dev) { dev->netdev_ops = &vti6_netdev_ops; - dev->destructor = vti6_dev_free; + dev->needs_free_netdev = true; + dev->priv_destructor = vti6_dev_free; dev->type = ARPHRD_TUNNEL6; dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr); @@ -1100,7 +1100,7 @@ static int __net_init vti6_init_net(struct net *net) return 0; err_register: - vti6_dev_free(ip6n->fb_tnl_dev); + free_netdev(ip6n->fb_tnl_dev); err_alloc_dev: return err; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 374997d26488..2ecb39b943b5 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -733,7 +733,7 @@ static void reg_vif_setup(struct net_device *dev) dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8; dev->flags = IFF_NOARP; dev->netdev_ops = ®_vif_netdev_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; dev->features |= NETIF_F_NETNS_LOCAL; } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 61e5902f0687..2378503577b0 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -265,7 +265,7 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, return nt; failed_free: - ipip6_dev_free(dev); + free_netdev(dev); failed: return NULL; } @@ -1336,7 +1336,6 @@ static void ipip6_dev_free(struct net_device *dev) dst_cache_destroy(&tunnel->dst_cache); free_percpu(dev->tstats); - free_netdev(dev); } #define SIT_FEATURES (NETIF_F_SG | \ @@ -1351,7 +1350,8 @@ static void ipip6_tunnel_setup(struct net_device *dev) int t_hlen = tunnel->hlen + sizeof(struct iphdr); dev->netdev_ops = &ipip6_netdev_ops; - dev->destructor = ipip6_dev_free; + dev->needs_free_netdev = true; + dev->priv_destructor = ipip6_dev_free; dev->type = ARPHRD_SIT; dev->hard_header_len = LL_MAX_HEADER + t_hlen; diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index 74d09f91709e..3be852808a9d 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -65,7 +65,7 @@ static void irlan_eth_setup(struct net_device *dev) ether_setup(dev); dev->netdev_ops = &irlan_eth_netdev_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; dev->min_mtu = 0; dev->max_mtu = ETH_MAX_MTU; diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 8b21af7321b9..f7c54ece3733 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -141,7 +141,7 @@ static void l2tp_eth_dev_setup(struct net_device *dev) dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->features |= NETIF_F_LLTX; dev->netdev_ops = &l2tp_eth_netdev_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; } static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 8fae1a72e6a7..915d7e1b4545 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1213,7 +1213,6 @@ static const struct net_device_ops ieee80211_monitorif_ops = { static void ieee80211_if_free(struct net_device *dev) { free_percpu(dev->tstats); - free_netdev(dev); } static void ieee80211_if_setup(struct net_device *dev) @@ -1221,7 +1220,8 @@ static void ieee80211_if_setup(struct net_device *dev) ether_setup(dev); dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->netdev_ops = &ieee80211_dataif_ops; - dev->destructor = ieee80211_if_free; + dev->needs_free_netdev = true; + dev->priv_destructor = ieee80211_if_free; } static void ieee80211_if_setup_no_queue(struct net_device *dev) @@ -1905,7 +1905,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ret = register_netdevice(ndev); if (ret) { - ieee80211_if_free(ndev); + free_netdev(ndev); return ret; } } diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index 06019dba4b10..bd88a9b80773 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -526,8 +526,6 @@ static void mac802154_wpan_free(struct net_device *dev) struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev); mac802154_llsec_destroy(&sdata->sec); - - free_netdev(dev); } static void ieee802154_if_setup(struct net_device *dev) @@ -593,7 +591,8 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata, sdata->dev->dev_addr); sdata->dev->header_ops = &mac802154_header_ops; - sdata->dev->destructor = mac802154_wpan_free; + sdata->dev->needs_free_netdev = true; + sdata->dev->priv_destructor = mac802154_wpan_free; sdata->dev->netdev_ops = &mac802154_wpan_ops; sdata->dev->ml_priv = &mac802154_mlme_wpan; wpan_dev->promiscuous_mode = false; @@ -608,7 +607,7 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata, break; case NL802154_IFTYPE_MONITOR: - sdata->dev->destructor = free_netdev; + sdata->dev->needs_free_netdev = true; sdata->dev->netdev_ops = &mac802154_monitor_ops; wpan_dev->promiscuous_mode = true; break; diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 89193a634da4..04a3128adcf0 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -94,7 +94,6 @@ static void internal_dev_destructor(struct net_device *dev) struct vport *vport = ovs_internal_dev_get_vport(dev); ovs_vport_free(vport); - free_netdev(dev); } static void @@ -156,7 +155,8 @@ static void do_setup(struct net_device *netdev) netdev->priv_flags &= ~IFF_TX_SKB_SHARING; netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH | IFF_PHONY_HEADROOM | IFF_NO_QUEUE; - netdev->destructor = internal_dev_destructor; + netdev->needs_free_netdev = true; + netdev->priv_destructor = internal_dev_destructor; netdev->ethtool_ops = &internal_dev_ethtool_ops; netdev->rtnl_link_ops = &internal_dev_link_ops; diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c index 21c28b51be94..2c9337946e30 100644 --- a/net/phonet/pep-gprs.c +++ b/net/phonet/pep-gprs.c @@ -236,7 +236,7 @@ static void gprs_setup(struct net_device *dev) dev->tx_queue_len = 10; dev->netdev_ops = &gprs_netdev_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; } /* -- cgit v1.2.3 From 8397ed36b7c585f8d3e06c431f4137309124f78f Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 7 Jun 2017 12:26:23 -0600 Subject: net: ipv6: Release route when device is unregistering Roopa reported attempts to delete a bond device that is referenced in a multipath route is hanging: $ ifdown bond2 # ifupdown2 command that deletes virtual devices unregister_netdevice: waiting for bond2 to become free. Usage count = 2 Steps to reproduce: echo 1 > /proc/sys/net/ipv6/conf/all/ignore_routes_with_linkdown ip link add dev bond12 type bond ip link add dev bond13 type bond ip addr add 2001:db8:2::0/64 dev bond12 ip addr add 2001:db8:3::0/64 dev bond13 ip route add 2001:db8:33::0/64 nexthop via 2001:db8:2::2 nexthop via 2001:db8:3::2 ip link del dev bond12 ip link del dev bond13 The root cause is the recent change to keep routes on a linkdown. Update the check to detect when the device is unregistering and release the route for that case. Fixes: a1a22c12060e4 ("net: ipv6: Keep nexthop of multipath route on admin down") Reported-by: Roopa Prabhu Signed-off-by: David Ahern Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++++ net/ipv6/route.c | 1 + 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ab7ca3fdc495..846193dfb0ac 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4262,6 +4262,11 @@ static inline const char *netdev_name(const struct net_device *dev) return dev->name; } +static inline bool netdev_unregistering(const struct net_device *dev) +{ + return dev->reg_state == NETREG_UNREGISTERING; +} + static inline const char *netdev_reg_state(const struct net_device *dev) { switch (dev->reg_state) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index dc61b0b5e64e..7cebd954d5bb 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2804,6 +2804,7 @@ static int fib6_ifdown(struct rt6_info *rt, void *arg) if ((rt->dst.dev == dev || !dev) && rt != adn->net->ipv6.ip6_null_entry && (rt->rt6i_nsiblings == 0 || + (dev && netdev_unregistering(dev)) || !rt->rt6i_idev->cnf.ignore_routes_with_linkdown)) return -1; -- cgit v1.2.3 From 0620fddb56dfaf0e1034eeb69d79c73b361debbf Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 8 Jun 2017 14:49:26 +0100 Subject: KEYS: sanitize key structs before freeing While a 'struct key' itself normally does not contain sensitive information, Documentation/security/keys.txt actually encourages this: "Having a payload is not required; and the payload can, in fact, just be a value stored in the struct key itself." In case someone has taken this advice, or will take this advice in the future, zero the key structure before freeing it. We might as well, and as a bonus this could make it a bit more difficult for an adversary to determine which keys have recently been in use. This is safe because the key_jar cache does not use a constructor. Signed-off-by: Eric Biggers Signed-off-by: David Howells Signed-off-by: James Morris --- include/linux/key.h | 1 - security/keys/gc.c | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index 0c9b93b0d1f7..78e25aabedaf 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -173,7 +173,6 @@ struct key { #ifdef KEY_DEBUGGING unsigned magic; #define KEY_DEBUG_MAGIC 0x18273645u -#define KEY_DEBUG_MAGIC_X 0xf8e9dacbu #endif unsigned long flags; /* status flags (change with bitops) */ diff --git a/security/keys/gc.c b/security/keys/gc.c index 595becc6d0d2..87cb260e4890 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -158,9 +158,7 @@ static noinline void key_gc_unused_keys(struct list_head *keys) kfree(key->description); -#ifdef KEY_DEBUGGING - key->magic = KEY_DEBUG_MAGIC_X; -#endif + memzero_explicit(key, sizeof(*key)); kmem_cache_free(key_jar, key); } } -- cgit v1.2.3 From 6d53cefb18e4646fb4bf62ccb6098fb3808486df Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 11 Jun 2017 15:51:56 -0700 Subject: compiler, clang: properly override 'inline' for clang Commit abb2ea7dfd82 ("compiler, clang: suppress warning for unused static inline functions") just caused more warnings due to re-defining the 'inline' macro. So undef it before re-defining it, and also add the 'notrace' attribute like the gcc version that this is overriding does. Maybe this makes clang happier. Signed-off-by: Linus Torvalds --- include/linux/compiler-clang.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index ea9126006a69..d614c5ea1b5e 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -21,4 +21,5 @@ * -Wunused-function. This turns out to avoid the need for complex #ifdef * directives. Suppress the warning in clang as well. */ -#define inline inline __attribute__((unused)) +#undef inline +#define inline inline __attribute__((unused)) notrace -- cgit v1.2.3 From 19e72d3abb63cb16d021a4066ce1a18880509e99 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 9 Feb 2017 17:28:50 -0800 Subject: configfs: Introduce config_item_get_unless_zero() Signed-off-by: Bart Van Assche [hch: minor style tweak] Signed-off-by: Christoph Hellwig --- fs/configfs/item.c | 8 ++++++++ include/linux/configfs.h | 3 ++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/configfs/item.c b/fs/configfs/item.c index 8b2a994042dd..a66f6624d899 100644 --- a/fs/configfs/item.c +++ b/fs/configfs/item.c @@ -138,6 +138,14 @@ struct config_item *config_item_get(struct config_item *item) } EXPORT_SYMBOL(config_item_get); +struct config_item *config_item_get_unless_zero(struct config_item *item) +{ + if (item && kref_get_unless_zero(&item->ci_kref)) + return item; + return NULL; +} +EXPORT_SYMBOL(config_item_get_unless_zero); + static void config_item_cleanup(struct config_item *item) { struct config_item_type *t = item->ci_type; diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 2319b8c108e8..c96709049683 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -74,7 +74,8 @@ extern void config_item_init_type_name(struct config_item *item, const char *name, struct config_item_type *type); -extern struct config_item * config_item_get(struct config_item *); +extern struct config_item *config_item_get(struct config_item *); +extern struct config_item *config_item_get_unless_zero(struct config_item *); extern void config_item_put(struct config_item *); struct config_item_type { -- cgit v1.2.3 From db46a0e1be7eac45d0bb1bdcd438b8d47c920451 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 14 Jun 2017 16:15:24 +0900 Subject: net: update undefined ->ndo_change_mtu() comment Update ->ndo_change_mtu() callback comment to remove text about returning error in case of undefined callback. This change makes the comment match the existing code behavior. Signed-off-by: Magnus Damm Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 846193dfb0ac..4ed952c17fc7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -914,8 +914,7 @@ struct xfrmdev_ops { * * int (*ndo_change_mtu)(struct net_device *dev, int new_mtu); * Called when a user wants to change the Maximum Transfer Unit - * of a device. If not defined, any request to change MTU will - * will return an error. + * of a device. * * void (*ndo_tx_timeout)(struct net_device *dev); * Callback used when the transmitter has not made any progress -- cgit v1.2.3 From dc9edc44de6cd7cc8cc7f5b36c1adb221eda3207 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 14 Jun 2017 13:27:50 -0600 Subject: block: Fix a blk_exit_rl() regression Avoid that the following complaint is reported: BUG: sleeping function called from invalid context at kernel/workqueue.c:2790 in_atomic(): 1, irqs_disabled(): 0, pid: 41, name: rcuop/3 1 lock held by rcuop/3/41: #0: (rcu_callback){......}, at: [] rcu_nocb_kthread+0x282/0x500 Call Trace: dump_stack+0x86/0xcf ___might_sleep+0x174/0x260 __might_sleep+0x4a/0x80 flush_work+0x7e/0x2e0 __cancel_work_timer+0x143/0x1c0 cancel_work_sync+0x10/0x20 blk_throtl_exit+0x25/0x60 blkcg_exit_queue+0x35/0x40 blk_release_queue+0x42/0x130 kobject_put+0xa9/0x190 This happens since we invoke callbacks that need to block from the queue release handler. Fix this by pushing the final release to a workqueue. Reported-by: Ross Zwisler Fixes: commit b425e5049258 ("block: Avoid that blk_exit_rl() triggers a use-after-free") Signed-off-by: Bart Van Assche Tested-by: Ross Zwisler Updated changelog Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 34 ++++++++++++++++++++++------------ include/linux/blkdev.h | 2 ++ 2 files changed, 24 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 283da7fbe034..27aceab1cc31 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -777,24 +777,25 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head) } /** - * blk_release_queue: - release a &struct request_queue when it is no longer needed - * @kobj: the kobj belonging to the request queue to be released + * __blk_release_queue - release a request queue when it is no longer needed + * @work: pointer to the release_work member of the request queue to be released * * Description: - * blk_release_queue is the pair to blk_init_queue() or - * blk_queue_make_request(). It should be called when a request queue is - * being released; typically when a block device is being de-registered. - * Currently, its primary task it to free all the &struct request - * structures that were allocated to the queue and the queue itself. + * blk_release_queue is the counterpart of blk_init_queue(). It should be + * called when a request queue is being released; typically when a block + * device is being de-registered. Its primary task it to free the queue + * itself. * - * Note: + * Notes: * The low level driver must have finished any outstanding requests first * via blk_cleanup_queue(). - **/ -static void blk_release_queue(struct kobject *kobj) + * + * Although blk_release_queue() may be called with preemption disabled, + * __blk_release_queue() may sleep. + */ +static void __blk_release_queue(struct work_struct *work) { - struct request_queue *q = - container_of(kobj, struct request_queue, kobj); + struct request_queue *q = container_of(work, typeof(*q), release_work); if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags)) blk_stat_remove_callback(q, q->poll_cb); @@ -834,6 +835,15 @@ static void blk_release_queue(struct kobject *kobj) call_rcu(&q->rcu_head, blk_free_queue_rcu); } +static void blk_release_queue(struct kobject *kobj) +{ + struct request_queue *q = + container_of(kobj, struct request_queue, kobj); + + INIT_WORK(&q->release_work, __blk_release_queue); + schedule_work(&q->release_work); +} + static const struct sysfs_ops queue_sysfs_ops = { .show = queue_attr_show, .store = queue_attr_store, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ab92c4ea138b..b74a3edcb3da 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -586,6 +586,8 @@ struct request_queue { size_t cmd_size; void *rq_alloc_data; + + struct work_struct release_work; }; #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ -- cgit v1.2.3 From c926820085437a27b27e78996b2c7a5ad94e8055 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 15 Jun 2017 13:46:00 +0200 Subject: firmware: dmi_scan: Make dmi_walk and dmi_walk_early return real error codes Currently they return -1 on error, which will confuse callers if they try to interpret it as a normal negative error code. Signed-off-by: Andy Lutomirski Signed-off-by: Darren Hart (VMware) Signed-off-by: Jean Delvare --- drivers/firmware/dmi_scan.c | 9 +++++---- include/linux/dmi.h | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index 82ee042f075c..fc30249132f5 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -144,7 +144,7 @@ static int __init dmi_walk_early(void (*decode)(const struct dmi_header *, buf = dmi_early_remap(dmi_base, orig_dmi_len); if (buf == NULL) - return -1; + return -ENOMEM; dmi_decode_table(buf, decode, NULL); @@ -1008,7 +1008,8 @@ EXPORT_SYMBOL(dmi_get_date); * @decode: Callback function * @private_data: Private data to be passed to the callback function * - * Returns -1 when the DMI table can't be reached, 0 on success. + * Returns 0 on success, -ENXIO if DMI is not selected or not present, + * or a different negative error code if DMI walking fails. */ int dmi_walk(void (*decode)(const struct dmi_header *, void *), void *private_data) @@ -1016,11 +1017,11 @@ int dmi_walk(void (*decode)(const struct dmi_header *, void *), u8 *buf; if (!dmi_available) - return -1; + return -ENXIO; buf = dmi_remap(dmi_base, dmi_len); if (buf == NULL) - return -1; + return -ENOMEM; dmi_decode_table(buf, decode, private_data); diff --git a/include/linux/dmi.h b/include/linux/dmi.h index 5e9c74cf8894..9bbf21a516e4 100644 --- a/include/linux/dmi.h +++ b/include/linux/dmi.h @@ -136,7 +136,7 @@ static inline int dmi_name_in_vendors(const char *s) { return 0; } static inline int dmi_name_in_serial(const char *s) { return 0; } #define dmi_available 0 static inline int dmi_walk(void (*decode)(const struct dmi_header *, void *), - void *private_data) { return -1; } + void *private_data) { return -ENXIO; } static inline bool dmi_match(enum dmi_field f, const char *str) { return false; } static inline void dmi_memdev_name(u16 handle, const char **bank, -- cgit v1.2.3 From 1be7107fbe18eed3e319a6c3e83c78254b693acb Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 19 Jun 2017 04:03:24 -0700 Subject: mm: larger stack guard gap, between vmas Stack guard page is a useful feature to reduce a risk of stack smashing into a different mapping. We have been using a single page gap which is sufficient to prevent having stack adjacent to a different mapping. But this seems to be insufficient in the light of the stack usage in userspace. E.g. glibc uses as large as 64kB alloca() in many commonly used functions. Others use constructs liks gid_t buffer[NGROUPS_MAX] which is 256kB or stack strings with MAX_ARG_STRLEN. This will become especially dangerous for suid binaries and the default no limit for the stack size limit because those applications can be tricked to consume a large portion of the stack and a single glibc call could jump over the guard page. These attacks are not theoretical, unfortunatelly. Make those attacks less probable by increasing the stack guard gap to 1MB (on systems with 4k pages; but make it depend on the page size because systems with larger base pages might cap stack allocations in the PAGE_SIZE units) which should cover larger alloca() and VLA stack allocations. It is obviously not a full fix because the problem is somehow inherent, but it should reduce attack space a lot. One could argue that the gap size should be configurable from userspace, but that can be done later when somebody finds that the new 1MB is wrong for some special case applications. For now, add a kernel command line option (stack_guard_gap) to specify the stack gap size (in page units). Implementation wise, first delete all the old code for stack guard page: because although we could get away with accounting one extra page in a stack vma, accounting a larger gap can break userspace - case in point, a program run with "ulimit -S -v 20000" failed when the 1MB gap was counted for RLIMIT_AS; similar problems could come with RLIMIT_MLOCK and strict non-overcommit mode. Instead of keeping gap inside the stack vma, maintain the stack guard gap as a gap between vmas: using vm_start_gap() in place of vm_start (or vm_end_gap() in place of vm_end if VM_GROWSUP) in just those few places which need to respect the gap - mainly arch_get_unmapped_area(), and and the vma tree's subtree_gap support for that. Original-patch-by: Oleg Nesterov Original-patch-by: Michal Hocko Signed-off-by: Hugh Dickins Acked-by: Michal Hocko Tested-by: Helge Deller # parisc Signed-off-by: Linus Torvalds --- Documentation/admin-guide/kernel-parameters.txt | 7 ++ arch/arc/mm/mmap.c | 2 +- arch/arm/mm/mmap.c | 4 +- arch/frv/mm/elf-fdpic.c | 2 +- arch/mips/mm/mmap.c | 2 +- arch/parisc/kernel/sys_parisc.c | 15 ++- arch/powerpc/mm/hugetlbpage-radix.c | 2 +- arch/powerpc/mm/mmap.c | 4 +- arch/powerpc/mm/slice.c | 2 +- arch/s390/mm/mmap.c | 4 +- arch/sh/mm/mmap.c | 4 +- arch/sparc/kernel/sys_sparc_64.c | 4 +- arch/sparc/mm/hugetlbpage.c | 2 +- arch/tile/mm/hugetlbpage.c | 2 +- arch/x86/kernel/sys_x86_64.c | 4 +- arch/x86/mm/hugetlbpage.c | 2 +- arch/xtensa/kernel/syscall.c | 2 +- fs/hugetlbfs/inode.c | 2 +- fs/proc/task_mmu.c | 4 - include/linux/mm.h | 53 ++++----- mm/gup.c | 5 - mm/memory.c | 38 ------ mm/mmap.c | 149 ++++++++++++++---------- 23 files changed, 152 insertions(+), 163 deletions(-) (limited to 'include/linux') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 0f5c3b4347c6..7737ab5d04b2 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3811,6 +3811,13 @@ expediting. Set to zero to disable automatic expediting. + stack_guard_gap= [MM] + override the default stack gap protection. The value + is in page units and it defines how many pages prior + to (for stacks growing down) resp. after (for stacks + growing up) the main stack are reserved for no other + mapping. Default value is 256 pages. + stacktrace [FTRACE] Enabled the stack tracer on boot up. diff --git a/arch/arc/mm/mmap.c b/arch/arc/mm/mmap.c index 3e25e8d6486b..2e13683dfb24 100644 --- a/arch/arc/mm/mmap.c +++ b/arch/arc/mm/mmap.c @@ -65,7 +65,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 2239fde10b80..f0701d8d24df 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -90,7 +90,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } @@ -141,7 +141,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } diff --git a/arch/frv/mm/elf-fdpic.c b/arch/frv/mm/elf-fdpic.c index da82c25301e7..46aa289c5102 100644 --- a/arch/frv/mm/elf-fdpic.c +++ b/arch/frv/mm/elf-fdpic.c @@ -75,7 +75,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi addr = PAGE_ALIGN(addr); vma = find_vma(current->mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) goto success; } diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c index 64dd8bdd92c3..28adeabe851f 100644 --- a/arch/mips/mm/mmap.c +++ b/arch/mips/mm/mmap.c @@ -93,7 +93,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp, vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index e5288638a1d9..378a754ca186 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -90,7 +90,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; + struct vm_area_struct *vma, *prev; unsigned long task_size = TASK_SIZE; int do_color_align, last_mmap; struct vm_unmapped_area_info info; @@ -117,9 +117,10 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, else addr = PAGE_ALIGN(addr); - vma = find_vma(mm, addr); + vma = find_vma_prev(mm, addr, &prev); if (task_size - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma)) && + (!prev || addr >= vm_end_gap(prev))) goto found_addr; } @@ -143,7 +144,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, const unsigned long len, const unsigned long pgoff, const unsigned long flags) { - struct vm_area_struct *vma; + struct vm_area_struct *vma, *prev; struct mm_struct *mm = current->mm; unsigned long addr = addr0; int do_color_align, last_mmap; @@ -177,9 +178,11 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, addr = COLOR_ALIGN(addr, last_mmap, pgoff); else addr = PAGE_ALIGN(addr); - vma = find_vma(mm, addr); + + vma = find_vma_prev(mm, addr, &prev); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma)) && + (!prev || addr >= vm_end_gap(prev))) goto found_addr; } diff --git a/arch/powerpc/mm/hugetlbpage-radix.c b/arch/powerpc/mm/hugetlbpage-radix.c index 6575b9aabef4..a12e86395025 100644 --- a/arch/powerpc/mm/hugetlbpage-radix.c +++ b/arch/powerpc/mm/hugetlbpage-radix.c @@ -68,7 +68,7 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr, addr = ALIGN(addr, huge_page_size(h)); vma = find_vma(mm, addr); if (mm->task_size - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } /* diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c index 9dbd2a733d6b..0ee6be4f1ba4 100644 --- a/arch/powerpc/mm/mmap.c +++ b/arch/powerpc/mm/mmap.c @@ -112,7 +112,7 @@ radix__arch_get_unmapped_area(struct file *filp, unsigned long addr, addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (mm->task_size - len >= addr && addr >= mmap_min_addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } @@ -157,7 +157,7 @@ radix__arch_get_unmapped_area_topdown(struct file *filp, addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (mm->task_size - len >= addr && addr >= mmap_min_addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 966b9fccfa66..45f6740dd407 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -99,7 +99,7 @@ static int slice_area_is_free(struct mm_struct *mm, unsigned long addr, if ((mm->task_size - len) < addr) return 0; vma = find_vma(mm, addr); - return (!vma || (addr + len) <= vma->vm_start); + return (!vma || (addr + len) <= vm_start_gap(vma)); } static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice) diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index b017daed6887..b854b1da281a 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -101,7 +101,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) goto check_asce_limit; } @@ -151,7 +151,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) goto check_asce_limit; } diff --git a/arch/sh/mm/mmap.c b/arch/sh/mm/mmap.c index 08e7af0be4a7..6a1a1297baae 100644 --- a/arch/sh/mm/mmap.c +++ b/arch/sh/mm/mmap.c @@ -64,7 +64,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } @@ -114,7 +114,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index ef4520efc813..043544d0cda3 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -120,7 +120,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi vma = find_vma(mm, addr); if (task_size - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } @@ -183,7 +183,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, vma = find_vma(mm, addr); if (task_size - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index 7c29d38e6b99..88855e383b34 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -120,7 +120,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, addr = ALIGN(addr, huge_page_size(h)); vma = find_vma(mm, addr); if (task_size - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } if (mm->get_unmapped_area == arch_get_unmapped_area) diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c index cb10153b5c9f..03e5cc4e76e4 100644 --- a/arch/tile/mm/hugetlbpage.c +++ b/arch/tile/mm/hugetlbpage.c @@ -233,7 +233,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, addr = ALIGN(addr, huge_page_size(h)); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } if (current->mm->get_unmapped_area == arch_get_unmapped_area) diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 207b8f2582c7..213ddf3e937d 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -144,7 +144,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (end - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } @@ -187,7 +187,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 302f43fd9c28..adad702b39cd 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -148,7 +148,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, addr = ALIGN(addr, huge_page_size(h)); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } if (mm->get_unmapped_area == arch_get_unmapped_area) diff --git a/arch/xtensa/kernel/syscall.c b/arch/xtensa/kernel/syscall.c index 06937928cb72..74afbf02d07e 100644 --- a/arch/xtensa/kernel/syscall.c +++ b/arch/xtensa/kernel/syscall.c @@ -88,7 +88,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, /* At this point: (!vmm || addr < vmm->vm_end). */ if (TASK_SIZE - len < addr) return -ENOMEM; - if (!vmm || addr + len <= vmm->vm_start) + if (!vmm || addr + len <= vm_start_gap(vmm)) return addr; addr = vmm->vm_end; if (flags & MAP_SHARED) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index dde861387a40..d44f5456eb9b 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -200,7 +200,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, addr = ALIGN(addr, huge_page_size(h)); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index f0c8b33d99b1..520802da059c 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -300,11 +300,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) /* We don't show the stack guard page in /proc/maps */ start = vma->vm_start; - if (stack_guard_page_start(vma, start)) - start += PAGE_SIZE; end = vma->vm_end; - if (stack_guard_page_end(vma, end)) - end -= PAGE_SIZE; seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ", diff --git a/include/linux/mm.h b/include/linux/mm.h index b892e95d4929..6f543a47fc92 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1393,12 +1393,6 @@ int clear_page_dirty_for_io(struct page *page); int get_cmdline(struct task_struct *task, char *buffer, int buflen); -/* Is the vma a continuation of the stack vma above it? */ -static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr) -{ - return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN); -} - static inline bool vma_is_anonymous(struct vm_area_struct *vma) { return !vma->vm_ops; @@ -1414,28 +1408,6 @@ bool vma_is_shmem(struct vm_area_struct *vma); static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; } #endif -static inline int stack_guard_page_start(struct vm_area_struct *vma, - unsigned long addr) -{ - return (vma->vm_flags & VM_GROWSDOWN) && - (vma->vm_start == addr) && - !vma_growsdown(vma->vm_prev, addr); -} - -/* Is the vma a continuation of the stack vma below it? */ -static inline int vma_growsup(struct vm_area_struct *vma, unsigned long addr) -{ - return vma && (vma->vm_start == addr) && (vma->vm_flags & VM_GROWSUP); -} - -static inline int stack_guard_page_end(struct vm_area_struct *vma, - unsigned long addr) -{ - return (vma->vm_flags & VM_GROWSUP) && - (vma->vm_end == addr) && - !vma_growsup(vma->vm_next, addr); -} - int vma_is_stack_for_current(struct vm_area_struct *vma); extern unsigned long move_page_tables(struct vm_area_struct *vma, @@ -2222,6 +2194,7 @@ void page_cache_async_readahead(struct address_space *mapping, pgoff_t offset, unsigned long size); +extern unsigned long stack_guard_gap; /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */ extern int expand_stack(struct vm_area_struct *vma, unsigned long address); @@ -2250,6 +2223,30 @@ static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * m return vma; } +static inline unsigned long vm_start_gap(struct vm_area_struct *vma) +{ + unsigned long vm_start = vma->vm_start; + + if (vma->vm_flags & VM_GROWSDOWN) { + vm_start -= stack_guard_gap; + if (vm_start > vma->vm_start) + vm_start = 0; + } + return vm_start; +} + +static inline unsigned long vm_end_gap(struct vm_area_struct *vma) +{ + unsigned long vm_end = vma->vm_end; + + if (vma->vm_flags & VM_GROWSUP) { + vm_end += stack_guard_gap; + if (vm_end < vma->vm_end) + vm_end = -PAGE_SIZE; + } + return vm_end; +} + static inline unsigned long vma_pages(struct vm_area_struct *vma) { return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; diff --git a/mm/gup.c b/mm/gup.c index b3c7214d710d..576c4df58882 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -387,11 +387,6 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, /* mlock all present pages, but do not fault in new pages */ if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK) return -ENOENT; - /* For mm_populate(), just skip the stack guard page. */ - if ((*flags & FOLL_POPULATE) && - (stack_guard_page_start(vma, address) || - stack_guard_page_end(vma, address + PAGE_SIZE))) - return -ENOENT; if (*flags & FOLL_WRITE) fault_flags |= FAULT_FLAG_WRITE; if (*flags & FOLL_REMOTE) diff --git a/mm/memory.c b/mm/memory.c index 2e65df1831d9..bb11c474857e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2854,40 +2854,6 @@ out_release: return ret; } -/* - * This is like a special single-page "expand_{down|up}wards()", - * except we must first make sure that 'address{-|+}PAGE_SIZE' - * doesn't hit another vma. - */ -static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address) -{ - address &= PAGE_MASK; - if ((vma->vm_flags & VM_GROWSDOWN) && address == vma->vm_start) { - struct vm_area_struct *prev = vma->vm_prev; - - /* - * Is there a mapping abutting this one below? - * - * That's only ok if it's the same stack mapping - * that has gotten split.. - */ - if (prev && prev->vm_end == address) - return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM; - - return expand_downwards(vma, address - PAGE_SIZE); - } - if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) { - struct vm_area_struct *next = vma->vm_next; - - /* As VM_GROWSDOWN but s/below/above/ */ - if (next && next->vm_start == address + PAGE_SIZE) - return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM; - - return expand_upwards(vma, address + PAGE_SIZE); - } - return 0; -} - /* * We enter with non-exclusive mmap_sem (to exclude vma changes, * but allow concurrent faults), and pte mapped but not yet locked. @@ -2904,10 +2870,6 @@ static int do_anonymous_page(struct vm_fault *vmf) if (vma->vm_flags & VM_SHARED) return VM_FAULT_SIGBUS; - /* Check if we need to add a guard page to the stack */ - if (check_stack_guard_page(vma, vmf->address) < 0) - return VM_FAULT_SIGSEGV; - /* * Use pte_alloc() instead of pte_alloc_map(). We can't run * pte_offset_map() on pmds where a huge pmd might be created diff --git a/mm/mmap.c b/mm/mmap.c index f82741e199c0..8e07976d5e47 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -183,6 +183,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) unsigned long retval; unsigned long newbrk, oldbrk; struct mm_struct *mm = current->mm; + struct vm_area_struct *next; unsigned long min_brk; bool populate; LIST_HEAD(uf); @@ -229,7 +230,8 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) } /* Check against existing mmap mappings. */ - if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE)) + next = find_vma(mm, oldbrk); + if (next && newbrk + PAGE_SIZE > vm_start_gap(next)) goto out; /* Ok, looks good - let it rip. */ @@ -253,10 +255,22 @@ out: static long vma_compute_subtree_gap(struct vm_area_struct *vma) { - unsigned long max, subtree_gap; - max = vma->vm_start; - if (vma->vm_prev) - max -= vma->vm_prev->vm_end; + unsigned long max, prev_end, subtree_gap; + + /* + * Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we + * allow two stack_guard_gaps between them here, and when choosing + * an unmapped area; whereas when expanding we only require one. + * That's a little inconsistent, but keeps the code here simpler. + */ + max = vm_start_gap(vma); + if (vma->vm_prev) { + prev_end = vm_end_gap(vma->vm_prev); + if (max > prev_end) + max -= prev_end; + else + max = 0; + } if (vma->vm_rb.rb_left) { subtree_gap = rb_entry(vma->vm_rb.rb_left, struct vm_area_struct, vm_rb)->rb_subtree_gap; @@ -352,7 +366,7 @@ static void validate_mm(struct mm_struct *mm) anon_vma_unlock_read(anon_vma); } - highest_address = vma->vm_end; + highest_address = vm_end_gap(vma); vma = vma->vm_next; i++; } @@ -541,7 +555,7 @@ void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma, if (vma->vm_next) vma_gap_update(vma->vm_next); else - mm->highest_vm_end = vma->vm_end; + mm->highest_vm_end = vm_end_gap(vma); /* * vma->vm_prev wasn't known when we followed the rbtree to find the @@ -856,7 +870,7 @@ again: vma_gap_update(vma); if (end_changed) { if (!next) - mm->highest_vm_end = end; + mm->highest_vm_end = vm_end_gap(vma); else if (!adjust_next) vma_gap_update(next); } @@ -941,7 +955,7 @@ again: * mm->highest_vm_end doesn't need any update * in remove_next == 1 case. */ - VM_WARN_ON(mm->highest_vm_end != end); + VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma)); } } if (insert && file) @@ -1787,7 +1801,7 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info) while (true) { /* Visit left subtree if it looks promising */ - gap_end = vma->vm_start; + gap_end = vm_start_gap(vma); if (gap_end >= low_limit && vma->vm_rb.rb_left) { struct vm_area_struct *left = rb_entry(vma->vm_rb.rb_left, @@ -1798,7 +1812,7 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info) } } - gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0; + gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0; check_current: /* Check if current node has a suitable gap */ if (gap_start > high_limit) @@ -1825,8 +1839,8 @@ check_current: vma = rb_entry(rb_parent(prev), struct vm_area_struct, vm_rb); if (prev == vma->vm_rb.rb_left) { - gap_start = vma->vm_prev->vm_end; - gap_end = vma->vm_start; + gap_start = vm_end_gap(vma->vm_prev); + gap_end = vm_start_gap(vma); goto check_current; } } @@ -1890,7 +1904,7 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info) while (true) { /* Visit right subtree if it looks promising */ - gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0; + gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0; if (gap_start <= high_limit && vma->vm_rb.rb_right) { struct vm_area_struct *right = rb_entry(vma->vm_rb.rb_right, @@ -1903,7 +1917,7 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info) check_current: /* Check if current node has a suitable gap */ - gap_end = vma->vm_start; + gap_end = vm_start_gap(vma); if (gap_end < low_limit) return -ENOMEM; if (gap_start <= high_limit && gap_end - gap_start >= length) @@ -1929,7 +1943,7 @@ check_current: struct vm_area_struct, vm_rb); if (prev == vma->vm_rb.rb_right) { gap_start = vma->vm_prev ? - vma->vm_prev->vm_end : 0; + vm_end_gap(vma->vm_prev) : 0; goto check_current; } } @@ -1967,7 +1981,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; + struct vm_area_struct *vma, *prev; struct vm_unmapped_area_info info; if (len > TASK_SIZE - mmap_min_addr) @@ -1978,9 +1992,10 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, if (addr) { addr = PAGE_ALIGN(addr); - vma = find_vma(mm, addr); + vma = find_vma_prev(mm, addr, &prev); if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma)) && + (!prev || addr >= vm_end_gap(prev))) return addr; } @@ -2003,7 +2018,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, const unsigned long len, const unsigned long pgoff, const unsigned long flags) { - struct vm_area_struct *vma; + struct vm_area_struct *vma, *prev; struct mm_struct *mm = current->mm; unsigned long addr = addr0; struct vm_unmapped_area_info info; @@ -2018,9 +2033,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, /* requesting a specific address */ if (addr) { addr = PAGE_ALIGN(addr); - vma = find_vma(mm, addr); + vma = find_vma_prev(mm, addr, &prev); if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma)) && + (!prev || addr >= vm_end_gap(prev))) return addr; } @@ -2155,21 +2171,19 @@ find_vma_prev(struct mm_struct *mm, unsigned long addr, * update accounting. This is shared with both the * grow-up and grow-down cases. */ -static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow) +static int acct_stack_growth(struct vm_area_struct *vma, + unsigned long size, unsigned long grow) { struct mm_struct *mm = vma->vm_mm; struct rlimit *rlim = current->signal->rlim; - unsigned long new_start, actual_size; + unsigned long new_start; /* address space limit tests */ if (!may_expand_vm(mm, vma->vm_flags, grow)) return -ENOMEM; /* Stack limit test */ - actual_size = size; - if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN))) - actual_size -= PAGE_SIZE; - if (actual_size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur)) + if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur)) return -ENOMEM; /* mlock limit tests */ @@ -2207,17 +2221,30 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns int expand_upwards(struct vm_area_struct *vma, unsigned long address) { struct mm_struct *mm = vma->vm_mm; + struct vm_area_struct *next; + unsigned long gap_addr; int error = 0; if (!(vma->vm_flags & VM_GROWSUP)) return -EFAULT; /* Guard against wrapping around to address 0. */ - if (address < PAGE_ALIGN(address+4)) - address = PAGE_ALIGN(address+4); - else + address &= PAGE_MASK; + address += PAGE_SIZE; + if (!address) return -ENOMEM; + /* Enforce stack_guard_gap */ + gap_addr = address + stack_guard_gap; + if (gap_addr < address) + return -ENOMEM; + next = vma->vm_next; + if (next && next->vm_start < gap_addr) { + if (!(next->vm_flags & VM_GROWSUP)) + return -ENOMEM; + /* Check that both stack segments have the same anon_vma? */ + } + /* We must make sure the anon_vma is allocated. */ if (unlikely(anon_vma_prepare(vma))) return -ENOMEM; @@ -2261,7 +2288,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) if (vma->vm_next) vma_gap_update(vma->vm_next); else - mm->highest_vm_end = address; + mm->highest_vm_end = vm_end_gap(vma); spin_unlock(&mm->page_table_lock); perf_event_mmap(vma); @@ -2282,6 +2309,8 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address) { struct mm_struct *mm = vma->vm_mm; + struct vm_area_struct *prev; + unsigned long gap_addr; int error; address &= PAGE_MASK; @@ -2289,6 +2318,17 @@ int expand_downwards(struct vm_area_struct *vma, if (error) return error; + /* Enforce stack_guard_gap */ + gap_addr = address - stack_guard_gap; + if (gap_addr > address) + return -ENOMEM; + prev = vma->vm_prev; + if (prev && prev->vm_end > gap_addr) { + if (!(prev->vm_flags & VM_GROWSDOWN)) + return -ENOMEM; + /* Check that both stack segments have the same anon_vma? */ + } + /* We must make sure the anon_vma is allocated. */ if (unlikely(anon_vma_prepare(vma))) return -ENOMEM; @@ -2343,28 +2383,25 @@ int expand_downwards(struct vm_area_struct *vma, return error; } -/* - * Note how expand_stack() refuses to expand the stack all the way to - * abut the next virtual mapping, *unless* that mapping itself is also - * a stack mapping. We want to leave room for a guard page, after all - * (the guard page itself is not added here, that is done by the - * actual page faulting logic) - * - * This matches the behavior of the guard page logic (see mm/memory.c: - * check_stack_guard_page()), which only allows the guard page to be - * removed under these circumstances. - */ +/* enforced gap between the expanding stack and other mappings. */ +unsigned long stack_guard_gap = 256UL< Tested-by: Max Gurtovoy Reviewed-by: Bart Van Assche Tested-by: Bart Van Assche Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 58 +++++++++++++++++++++++++++++++++++++++----------- block/blk-mq-sched.h | 9 -------- block/blk-mq.c | 16 +++++++++++--- include/linux/blkdev.h | 2 ++ 4 files changed, 61 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 1f5b692526ae..0ded5e846335 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -68,6 +68,45 @@ static void blk_mq_sched_assign_ioc(struct request_queue *q, __blk_mq_sched_assign_ioc(q, rq, bio, ioc); } +/* + * Mark a hardware queue as needing a restart. For shared queues, maintain + * a count of how many hardware queues are marked for restart. + */ +static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx) +{ + if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) + return; + + if (hctx->flags & BLK_MQ_F_TAG_SHARED) { + struct request_queue *q = hctx->queue; + + if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) + atomic_inc(&q->shared_hctx_restart); + } else + set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); +} + +static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx) +{ + if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) + return false; + + if (hctx->flags & BLK_MQ_F_TAG_SHARED) { + struct request_queue *q = hctx->queue; + + if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) + atomic_dec(&q->shared_hctx_restart); + } else + clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); + + if (blk_mq_hctx_has_pending(hctx)) { + blk_mq_run_hw_queue(hctx, true); + return true; + } + + return false; +} + struct request *blk_mq_sched_get_request(struct request_queue *q, struct bio *bio, unsigned int op, @@ -266,18 +305,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, return true; } -static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx) -{ - if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) { - clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); - if (blk_mq_hctx_has_pending(hctx)) { - blk_mq_run_hw_queue(hctx, true); - return true; - } - } - return false; -} - /** * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list * @pos: loop cursor. @@ -309,6 +336,13 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx) unsigned int i, j; if (set->flags & BLK_MQ_F_TAG_SHARED) { + /* + * If this is 0, then we know that no hardware queues + * have RESTART marked. We're done. + */ + if (!atomic_read(&queue->shared_hctx_restart)) + return; + rcu_read_lock(); list_for_each_entry_rcu_rr(q, queue, &set->tag_list, tag_set_list) { diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index edafb5383b7b..5007edece51a 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -115,15 +115,6 @@ static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx) return false; } -/* - * Mark a hardware queue as needing a restart. - */ -static inline void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx) -{ - if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) - set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); -} - static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx) { return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); diff --git a/block/blk-mq.c b/block/blk-mq.c index bb66c96850b1..958cedaff8b8 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2103,20 +2103,30 @@ static void blk_mq_map_swqueue(struct request_queue *q, } } +/* + * Caller needs to ensure that we're either frozen/quiesced, or that + * the queue isn't live yet. + */ static void queue_set_hctx_shared(struct request_queue *q, bool shared) { struct blk_mq_hw_ctx *hctx; int i; queue_for_each_hw_ctx(q, hctx, i) { - if (shared) + if (shared) { + if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) + atomic_inc(&q->shared_hctx_restart); hctx->flags |= BLK_MQ_F_TAG_SHARED; - else + } else { + if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) + atomic_dec(&q->shared_hctx_restart); hctx->flags &= ~BLK_MQ_F_TAG_SHARED; + } } } -static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, bool shared) +static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, + bool shared) { struct request_queue *q; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b74a3edcb3da..1ddd36bd2173 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -391,6 +391,8 @@ struct request_queue { int nr_rqs[2]; /* # allocated [a]sync rqs */ int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */ + atomic_t shared_hctx_restart; + struct blk_queue_stats *stats; struct rq_wb *rq_wb; -- cgit v1.2.3