From 0140e6141e4f1d4b15fb469e6912b0e71b7d1cc2 Mon Sep 17 00:00:00 2001 From: Sonny Rao Date: Tue, 21 Apr 2015 12:33:11 -0700 Subject: perf/x86/intel/uncore: Move PCI IDs for IMC to uncore driver This keeps all the related PCI IDs together in the driver where they are used. Signed-off-by: Sonny Rao Acked-by: Bjorn Helgaas Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1429644791-25724-1-git-send-email-sonnyrao@chromium.org Signed-off-by: Ingo Molnar --- include/linux/pci_ids.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index e63c02a93f6b..a59385852233 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2539,10 +2539,6 @@ #define PCI_VENDOR_ID_INTEL 0x8086 #define PCI_DEVICE_ID_INTEL_EESSC 0x0008 -#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100 -#define PCI_DEVICE_ID_INTEL_IVB_IMC 0x0154 -#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 -#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00 #define PCI_DEVICE_ID_INTEL_PXHD_0 0x0320 #define PCI_DEVICE_ID_INTEL_PXHD_1 0x0321 #define PCI_DEVICE_ID_INTEL_PXH_0 0x0329 -- cgit v1.2.3 From 03c57747a7020a28a200e7e920fb48ecdc9b0fb8 Mon Sep 17 00:00:00 2001 From: Robert Shearman Date: Wed, 22 Apr 2015 11:14:37 +0100 Subject: mpls: Per-device MPLS state Add per-device MPLS state to supported interfaces. Use the presence of this state in mpls_route_add to determine that this is a supported interface. Use the presence of mpls_dev to drop packets that arrived on an unsupported interface - previously they were allowed through. Cc: "Eric W. Biederman" Signed-off-by: Robert Shearman Reviewed-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++++ net/mpls/af_mpls.c | 50 +++++++++++++++++++++++++++++++++++++++++++++-- net/mpls/internal.h | 3 +++ 3 files changed, 55 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bcbde799ec69..dae106a3a998 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -60,6 +60,7 @@ struct phy_device; struct wireless_dev; /* 802.15.4 specific */ struct wpan_dev; +struct mpls_dev; void netdev_set_default_ethtool_ops(struct net_device *dev, const struct ethtool_ops *ops); @@ -1627,6 +1628,9 @@ struct net_device { void *ax25_ptr; struct wireless_dev *ieee80211_ptr; struct wpan_dev *ieee802154_ptr; +#if IS_ENABLED(CONFIG_MPLS_ROUTING) + struct mpls_dev __rcu *mpls_ptr; +#endif /* * Cache lines mostly used on receive path (including eth_type_trans()) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index db8a2ea6d4de..ad45017eed99 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -53,6 +53,11 @@ static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index) return rt; } +static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev) +{ + return rcu_dereference_rtnl(dev->mpls_ptr); +} + static bool mpls_output_possible(const struct net_device *dev) { return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev); @@ -136,6 +141,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, struct mpls_route *rt; struct mpls_entry_decoded dec; struct net_device *out_dev; + struct mpls_dev *mdev; unsigned int hh_len; unsigned int new_header_size; unsigned int mtu; @@ -143,6 +149,10 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, /* Careful this entire function runs inside of an rcu critical section */ + mdev = mpls_dev_get(dev); + if (!mdev) + goto drop; + if (skb->pkt_type != PACKET_HOST) goto drop; @@ -352,9 +362,9 @@ static int mpls_route_add(struct mpls_route_config *cfg) if (!dev) goto errout; - /* For now just support ethernet devices */ + /* Ensure this is a supported device */ err = -EINVAL; - if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK)) + if (!mpls_dev_get(dev)) goto errout; err = -EINVAL; @@ -428,10 +438,27 @@ errout: return err; } +static struct mpls_dev *mpls_add_dev(struct net_device *dev) +{ + struct mpls_dev *mdev; + int err = -ENOMEM; + + ASSERT_RTNL(); + + mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); + if (!mdev) + return ERR_PTR(err); + + rcu_assign_pointer(dev->mpls_ptr, mdev); + + return mdev; +} + static void mpls_ifdown(struct net_device *dev) { struct mpls_route __rcu **platform_label; struct net *net = dev_net(dev); + struct mpls_dev *mdev; unsigned index; platform_label = rtnl_dereference(net->mpls.platform_label); @@ -443,14 +470,33 @@ static void mpls_ifdown(struct net_device *dev) continue; rt->rt_dev = NULL; } + + mdev = mpls_dev_get(dev); + if (!mdev) + return; + + RCU_INIT_POINTER(dev->mpls_ptr, NULL); + + kfree(mdev); } static int mpls_dev_notify(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct mpls_dev *mdev; switch(event) { + case NETDEV_REGISTER: + /* For now just support ethernet devices */ + if ((dev->type == ARPHRD_ETHER) || + (dev->type == ARPHRD_LOOPBACK)) { + mdev = mpls_add_dev(dev); + if (IS_ERR(mdev)) + return notifier_from_errno(PTR_ERR(mdev)); + } + break; + case NETDEV_UNREGISTER: mpls_ifdown(dev); break; diff --git a/net/mpls/internal.h b/net/mpls/internal.h index fb6de92052c4..8090cb3099b4 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -22,6 +22,9 @@ struct mpls_entry_decoded { u8 bos; }; +struct mpls_dev { +}; + struct sk_buff; static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) -- cgit v1.2.3 From 7e01b5acd88b3f3108d8c4ce44e3205d67437202 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 16 Apr 2015 14:47:33 +0200 Subject: kexec: allocate the kexec control page with KEXEC_CONTROL_MEMORY_GFP Introduce KEXEC_CONTROL_MEMORY_GFP to allow the architecture code to override the gfp flags of the allocation for the kexec control page. The loop in kimage_alloc_normal_control_pages allocates pages with GFP_KERNEL until a page is found that happens to have an address smaller than the KEXEC_CONTROL_MEMORY_LIMIT. On systems with a large memory size but a small KEXEC_CONTROL_MEMORY_LIMIT the loop will keep allocating memory until the oom killer steps in. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/kexec.h | 3 +++ include/linux/kexec.h | 4 ++++ kernel/kexec.c | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h index 694bcd6bd927..2f924bc30e35 100644 --- a/arch/s390/include/asm/kexec.h +++ b/arch/s390/include/asm/kexec.h @@ -26,6 +26,9 @@ /* Not more than 2GB */ #define KEXEC_CONTROL_MEMORY_LIMIT (1UL<<31) +/* Allocate control page with GFP_DMA */ +#define KEXEC_CONTROL_MEMORY_GFP GFP_DMA + /* Maximum address we can use for the crash control pages */ #define KEXEC_CRASH_CONTROL_MEMORY_LIMIT (-1UL) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index e60a745ac198..e804306ef5e8 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -40,6 +40,10 @@ #error KEXEC_CONTROL_MEMORY_LIMIT not defined #endif +#ifndef KEXEC_CONTROL_MEMORY_GFP +#define KEXEC_CONTROL_MEMORY_GFP GFP_KERNEL +#endif + #ifndef KEXEC_CONTROL_PAGE_SIZE #error KEXEC_CONTROL_PAGE_SIZE not defined #endif diff --git a/kernel/kexec.c b/kernel/kexec.c index 38c25b1f2fd5..7a36fdcca5bf 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -707,7 +707,7 @@ static struct page *kimage_alloc_normal_control_pages(struct kimage *image, do { unsigned long pfn, epfn, addr, eaddr; - pages = kimage_alloc_pages(GFP_KERNEL, order); + pages = kimage_alloc_pages(KEXEC_CONTROL_MEMORY_GFP, order); if (!pages) break; pfn = page_to_pfn(pages); -- cgit v1.2.3 From ec65aafb9e3f316ff9167289e288856a7d528773 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 Apr 2015 12:06:30 +0200 Subject: netdev_alloc_pcpu_stats: use less common iterator variable With the CPU iteration variable called 'i', it's relatively easy to have variable shadowing which sparse will warn about. Avoid that by renaming the variable to __cpu which is less likely to be used in the surrounding context. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index dae106a3a998..dbad4d728b4b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2025,10 +2025,10 @@ struct pcpu_sw_netstats { ({ \ typeof(type) __percpu *pcpu_stats = alloc_percpu(type); \ if (pcpu_stats) { \ - int i; \ - for_each_possible_cpu(i) { \ + int __cpu; \ + for_each_possible_cpu(__cpu) { \ typeof(type) *stat; \ - stat = per_cpu_ptr(pcpu_stats, i); \ + stat = per_cpu_ptr(pcpu_stats, __cpu); \ u64_stats_init(&stat->syncp); \ } \ } \ -- cgit v1.2.3 From 1d8dc3d3c8f1d8ee1da9d54c5d7c8694419ade42 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 Apr 2015 16:38:43 +0200 Subject: rhashtable: don't attempt to grow when at max_size The conversion of mac80211's station table to rhashtable had a bug that I found by accident in code review, that hadn't been found as rhashtable apparently managed to have a maximum hash chain length of one (!) in all our testing. In order to test the bug and verify the fix I set my rhashtable's max_size very low (4) in order to force getting hash collisions. At that point, rhashtable WARNed in rhashtable_insert_rehash() but didn't actually reject the hash table insertion. This caused it to lose insertions - my master list of stations would have 9 entries, but the rhashtable only had 5. This may warrant a deeper look, but that WARN_ON() just shouldn't happen. Fix this by not returning true from rht_grow_above_100() when the rhashtable's max_size has been reached - in this case the user is explicitly configuring it to be at most that big, so even if it's now above 100% it shouldn't attempt to resize. This fixes the "lost insertion" issue and consequently allows my code to display its error (and verify my fix for it.) Signed-off-by: Johannes Berg Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index e23d242d1230..dbcbcc59aa92 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -282,7 +282,8 @@ static inline bool rht_shrink_below_30(const struct rhashtable *ht, static inline bool rht_grow_above_100(const struct rhashtable *ht, const struct bucket_table *tbl) { - return atomic_read(&ht->nelems) > tbl->size; + return atomic_read(&ht->nelems) > tbl->size && + (!ht->p.max_size || tbl->size < ht->p.max_size); } /* The bucket lock is selected based on the hash and protects mutations -- cgit v1.2.3 From 547c4b547e07dcc60874b6ef6252dd49ff74aec1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 20 Apr 2015 12:35:47 +0200 Subject: netfilter: bridge: fix NULL deref in physin/out ifindex helpers Might not have an outdev yet. We'll oops when iface goes down while skbs are still nfqueue'd: RIP: 0010:[] [] dev_cmp+0x4f/0x80 nfqnl_rcv_dev_event+0xe2/0x150 notifier_call_chain+0x53/0xa0 Fixes: c737b7c4510026 ("netfilter: bridge: add helpers for fetching physin/outdev") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index ab8f76dba668..f2fdb5a52070 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -39,12 +39,24 @@ static inline void br_drop_fake_rtable(struct sk_buff *skb) static inline int nf_bridge_get_physinif(const struct sk_buff *skb) { - return skb->nf_bridge ? skb->nf_bridge->physindev->ifindex : 0; + struct nf_bridge_info *nf_bridge; + + if (skb->nf_bridge == NULL) + return 0; + + nf_bridge = skb->nf_bridge; + return nf_bridge->physindev ? nf_bridge->physindev->ifindex : 0; } static inline int nf_bridge_get_physoutif(const struct sk_buff *skb) { - return skb->nf_bridge ? skb->nf_bridge->physoutdev->ifindex : 0; + struct nf_bridge_info *nf_bridge; + + if (skb->nf_bridge == NULL) + return 0; + + nf_bridge = skb->nf_bridge; + return nf_bridge->physoutdev ? nf_bridge->physoutdev->ifindex : 0; } static inline struct net_device * -- cgit v1.2.3 From 1dcc73d7bb0429994c54d33b40c5fb82b741a791 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 22 Apr 2015 18:20:04 +0100 Subject: irqchip: gic: Drop support for gic_arch_extn Now that the users of gic_arch_extn have been fixed, drop the "feature" for good. This leads to the removal of some now useless locking. Signed-off-by: Marc Zyngier Cc: linux-arm-kernel@lists.infradead.org Cc: Jason Cooper Signed-off-by: Thomas Gleixner --- drivers/irqchip/irq-gic.c | 71 +---------------------------------------- include/linux/irqchip/arm-gic.h | 2 -- 2 files changed, 1 insertion(+), 72 deletions(-) (limited to 'include/linux') diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index a6ce3476834e..dd989148e689 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -80,19 +80,6 @@ static DEFINE_RAW_SPINLOCK(irq_controller_lock); #define NR_GIC_CPU_IF 8 static u8 gic_cpu_map[NR_GIC_CPU_IF] __read_mostly; -/* - * Supported arch specific GIC irq extension. - * Default make them NULL. - */ -struct irq_chip gic_arch_extn = { - .irq_eoi = NULL, - .irq_mask = NULL, - .irq_unmask = NULL, - .irq_retrigger = NULL, - .irq_set_type = NULL, - .irq_set_wake = NULL, -}; - #ifndef MAX_GIC_NR #define MAX_GIC_NR 1 #endif @@ -165,34 +152,16 @@ static int gic_peek_irq(struct irq_data *d, u32 offset) static void gic_mask_irq(struct irq_data *d) { - unsigned long flags; - - raw_spin_lock_irqsave(&irq_controller_lock, flags); gic_poke_irq(d, GIC_DIST_ENABLE_CLEAR); - if (gic_arch_extn.irq_mask) - gic_arch_extn.irq_mask(d); - raw_spin_unlock_irqrestore(&irq_controller_lock, flags); } static void gic_unmask_irq(struct irq_data *d) { - unsigned long flags; - - raw_spin_lock_irqsave(&irq_controller_lock, flags); - if (gic_arch_extn.irq_unmask) - gic_arch_extn.irq_unmask(d); gic_poke_irq(d, GIC_DIST_ENABLE_SET); - raw_spin_unlock_irqrestore(&irq_controller_lock, flags); } static void gic_eoi_irq(struct irq_data *d) { - if (gic_arch_extn.irq_eoi) { - raw_spin_lock(&irq_controller_lock); - gic_arch_extn.irq_eoi(d); - raw_spin_unlock(&irq_controller_lock); - } - writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_EOI); } @@ -249,8 +218,6 @@ static int gic_set_type(struct irq_data *d, unsigned int type) { void __iomem *base = gic_dist_base(d); unsigned int gicirq = gic_irq(d); - unsigned long flags; - int ret; /* Interrupt configuration for SGIs can't be changed */ if (gicirq < 16) @@ -261,25 +228,7 @@ static int gic_set_type(struct irq_data *d, unsigned int type) type != IRQ_TYPE_EDGE_RISING) return -EINVAL; - raw_spin_lock_irqsave(&irq_controller_lock, flags); - - if (gic_arch_extn.irq_set_type) - gic_arch_extn.irq_set_type(d, type); - - ret = gic_configure_irq(gicirq, type, base, NULL); - - raw_spin_unlock_irqrestore(&irq_controller_lock, flags); - - return ret; -} - -static int gic_retrigger(struct irq_data *d) -{ - if (gic_arch_extn.irq_retrigger) - return gic_arch_extn.irq_retrigger(d); - - /* the genirq layer expects 0 if we can't retrigger in hardware */ - return 0; + return gic_configure_irq(gicirq, type, base, NULL); } #ifdef CONFIG_SMP @@ -310,21 +259,6 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, } #endif -#ifdef CONFIG_PM -static int gic_set_wake(struct irq_data *d, unsigned int on) -{ - int ret = -ENXIO; - - if (gic_arch_extn.irq_set_wake) - ret = gic_arch_extn.irq_set_wake(d, on); - - return ret; -} - -#else -#define gic_set_wake NULL -#endif - static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) { u32 irqstat, irqnr; @@ -383,11 +317,9 @@ static struct irq_chip gic_chip = { .irq_unmask = gic_unmask_irq, .irq_eoi = gic_eoi_irq, .irq_set_type = gic_set_type, - .irq_retrigger = gic_retrigger, #ifdef CONFIG_SMP .irq_set_affinity = gic_set_affinity, #endif - .irq_set_wake = gic_set_wake, .irq_get_irqchip_state = gic_irq_get_irqchip_state, .irq_set_irqchip_state = gic_irq_set_irqchip_state, }; @@ -1053,7 +985,6 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start, set_handle_irq(gic_handle_irq); } - gic_chip.flags |= gic_arch_extn.flags; gic_dist_init(gic); gic_cpu_init(gic); gic_pm_init(gic); diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 36ec4ae74634..9de976b4f9a7 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -95,8 +95,6 @@ struct device_node; -extern struct irq_chip gic_arch_extn; - void gic_set_irqchip_flags(unsigned long flags); void gic_init_bases(unsigned int, int, void __iomem *, void __iomem *, u32 offset, struct device_node *); -- cgit v1.2.3 From 2ea2f62c8bda242433809c7f4e9eae1c52c40bbe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 24 Apr 2015 16:05:01 -0700 Subject: net: fix crash in build_skb() When I added pfmemalloc support in build_skb(), I forgot netlink was using build_skb() with a vmalloc() area. In this patch I introduce __build_skb() for netlink use, and build_skb() is a wrapper handling both skb->head_frag and skb->pfmemalloc This means netlink no longer has to hack skb->head_frag [ 1567.700067] kernel BUG at arch/x86/mm/physaddr.c:26! [ 1567.700067] invalid opcode: 0000 [#1] PREEMPT SMP KASAN [ 1567.700067] Dumping ftrace buffer: [ 1567.700067] (ftrace buffer empty) [ 1567.700067] Modules linked in: [ 1567.700067] CPU: 9 PID: 16186 Comm: trinity-c182 Not tainted 4.0.0-next-20150424-sasha-00037-g4796e21 #2167 [ 1567.700067] task: ffff880127efb000 ti: ffff880246770000 task.ti: ffff880246770000 [ 1567.700067] RIP: __phys_addr (arch/x86/mm/physaddr.c:26 (discriminator 3)) [ 1567.700067] RSP: 0018:ffff8802467779d8 EFLAGS: 00010202 [ 1567.700067] RAX: 000041000ed8e000 RBX: ffffc9008ed8e000 RCX: 000000000000002c [ 1567.700067] RDX: 0000000000000004 RSI: 0000000000000000 RDI: ffffffffb3fd6049 [ 1567.700067] RBP: ffff8802467779f8 R08: 0000000000000019 R09: ffff8801d0168000 [ 1567.700067] R10: ffff8801d01680c7 R11: ffffed003a02d019 R12: ffffc9000ed8e000 [ 1567.700067] R13: 0000000000000f40 R14: 0000000000001180 R15: ffffc9000ed8e000 [ 1567.700067] FS: 00007f2a7da3f700(0000) GS:ffff8801d1000000(0000) knlGS:0000000000000000 [ 1567.700067] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1567.700067] CR2: 0000000000738308 CR3: 000000022e329000 CR4: 00000000000007e0 [ 1567.700067] Stack: [ 1567.700067] ffffc9000ed8e000 ffff8801d0168000 ffffc9000ed8e000 ffff8801d0168000 [ 1567.700067] ffff880246777a28 ffffffffad7c0a21 0000000000001080 ffff880246777c08 [ 1567.700067] ffff88060d302e68 ffff880246777b58 ffff880246777b88 ffffffffad9a6821 [ 1567.700067] Call Trace: [ 1567.700067] build_skb (include/linux/mm.h:508 net/core/skbuff.c:316) [ 1567.700067] netlink_sendmsg (net/netlink/af_netlink.c:1633 net/netlink/af_netlink.c:2329) [ 1567.774369] ? sched_clock_cpu (kernel/sched/clock.c:311) [ 1567.774369] ? netlink_unicast (net/netlink/af_netlink.c:2273) [ 1567.774369] ? netlink_unicast (net/netlink/af_netlink.c:2273) [ 1567.774369] sock_sendmsg (net/socket.c:614 net/socket.c:623) [ 1567.774369] sock_write_iter (net/socket.c:823) [ 1567.774369] ? sock_sendmsg (net/socket.c:806) [ 1567.774369] __vfs_write (fs/read_write.c:479 fs/read_write.c:491) [ 1567.774369] ? get_lock_stats (kernel/locking/lockdep.c:249) [ 1567.774369] ? default_llseek (fs/read_write.c:487) [ 1567.774369] ? vtime_account_user (kernel/sched/cputime.c:701) [ 1567.774369] ? rw_verify_area (fs/read_write.c:406 (discriminator 4)) [ 1567.774369] vfs_write (fs/read_write.c:539) [ 1567.774369] SyS_write (fs/read_write.c:586 fs/read_write.c:577) [ 1567.774369] ? SyS_read (fs/read_write.c:577) [ 1567.774369] ? __this_cpu_preempt_check (lib/smp_processor_id.c:63) [ 1567.774369] ? trace_hardirqs_on_caller (kernel/locking/lockdep.c:2594 kernel/locking/lockdep.c:2636) [ 1567.774369] ? trace_hardirqs_on_thunk (arch/x86/lib/thunk_64.S:42) [ 1567.774369] system_call_fastpath (arch/x86/kernel/entry_64.S:261) Fixes: 79930f5892e ("net: do not deplete pfmemalloc reserve") Signed-off-by: Eric Dumazet Reported-by: Sasha Levin Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + net/core/skbuff.c | 31 ++++++++++++++++++++++--------- net/netlink/af_netlink.c | 6 ++---- 3 files changed, 25 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 06793b598f44..66e374d62f64 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -773,6 +773,7 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags, int node); +struct sk_buff *__build_skb(void *data, unsigned int frag_size); struct sk_buff *build_skb(void *data, unsigned int frag_size); static inline struct sk_buff *alloc_skb(unsigned int size, gfp_t priority) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 456ead534e10..3cfff2a3d651 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -280,13 +280,14 @@ nodata: EXPORT_SYMBOL(__alloc_skb); /** - * build_skb - build a network buffer + * __build_skb - build a network buffer * @data: data buffer provided by caller - * @frag_size: size of fragment, or 0 if head was kmalloced + * @frag_size: size of data, or 0 if head was kmalloced * * Allocate a new &sk_buff. Caller provides space holding head and * skb_shared_info. @data must have been allocated by kmalloc() only if - * @frag_size is 0, otherwise data should come from the page allocator. + * @frag_size is 0, otherwise data should come from the page allocator + * or vmalloc() * The return is the new skb buffer. * On a failure the return is %NULL, and @data is not freed. * Notes : @@ -297,7 +298,7 @@ EXPORT_SYMBOL(__alloc_skb); * before giving packet to stack. * RX rings only contains data buffers, not full skbs. */ -struct sk_buff *build_skb(void *data, unsigned int frag_size) +struct sk_buff *__build_skb(void *data, unsigned int frag_size) { struct skb_shared_info *shinfo; struct sk_buff *skb; @@ -311,11 +312,6 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) memset(skb, 0, offsetof(struct sk_buff, tail)); skb->truesize = SKB_TRUESIZE(size); - if (frag_size) { - skb->head_frag = 1; - if (virt_to_head_page(data)->pfmemalloc) - skb->pfmemalloc = 1; - } atomic_set(&skb->users, 1); skb->head = data; skb->data = data; @@ -332,6 +328,23 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) return skb; } + +/* build_skb() is wrapper over __build_skb(), that specifically + * takes care of skb->head and skb->pfmemalloc + * This means that if @frag_size is not zero, then @data must be backed + * by a page fragment, not kmalloc() or vmalloc() + */ +struct sk_buff *build_skb(void *data, unsigned int frag_size) +{ + struct sk_buff *skb = __build_skb(data, frag_size); + + if (skb && frag_size) { + skb->head_frag = 1; + if (virt_to_head_page(data)->pfmemalloc) + skb->pfmemalloc = 1; + } + return skb; +} EXPORT_SYMBOL(build_skb); struct netdev_alloc_cache { diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 19909d0786a2..ec4adbdcb9b4 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1629,13 +1629,11 @@ static struct sk_buff *netlink_alloc_large_skb(unsigned int size, if (data == NULL) return NULL; - skb = build_skb(data, size); + skb = __build_skb(data, size); if (skb == NULL) vfree(data); - else { - skb->head_frag = 0; + else skb->destructor = netlink_skb_destructor; - } return skb; } -- cgit v1.2.3 From 73459e2a1ada09a68c02cc5b73f3116fc8194b3d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 23 Apr 2015 13:20:18 +0200 Subject: x86: pvclock: Really remove the sched notifier for cross-cpu migrations This reverts commits 0a4e6be9ca17c54817cf814b4b5aa60478c6df27 and 80f7fdb1c7f0f9266421f823964fd1962681f6ce. The task migration notifier was originally introduced in order to support the pvclock vsyscall with non-synchronized TSC, but KVM only supports it with synchronized TSC. Hence, on KVM the race condition is only needed due to a bad implementation on the host side, and even then it's so rare that it's mostly theoretical. As far as KVM is concerned it's possible to fix the host, avoiding the additional complexity in the vDSO and the (re)introduction of the task migration notifier. Xen, on the other hand, hasn't yet implemented vsyscall support at all, so we do not care about its plans for non-synchronized TSC. Reported-by: Peter Zijlstra Suggested-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/pvclock.h | 1 - arch/x86/kernel/pvclock.c | 44 ------------------------------------------ arch/x86/vdso/vclock_gettime.c | 34 ++++++++++++++------------------ include/linux/sched.h | 8 -------- kernel/sched/core.c | 15 -------------- 5 files changed, 15 insertions(+), 87 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 25b1cc07d496..d6b078e9fa28 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -95,7 +95,6 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, struct pvclock_vsyscall_time_info { struct pvclock_vcpu_time_info pvti; - u32 migrate_count; } __attribute__((__aligned__(SMP_CACHE_BYTES))); #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index e5ecd20e72dd..2f355d229a58 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -141,46 +141,7 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); } -static struct pvclock_vsyscall_time_info *pvclock_vdso_info; - -static struct pvclock_vsyscall_time_info * -pvclock_get_vsyscall_user_time_info(int cpu) -{ - if (!pvclock_vdso_info) { - BUG(); - return NULL; - } - - return &pvclock_vdso_info[cpu]; -} - -struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu) -{ - return &pvclock_get_vsyscall_user_time_info(cpu)->pvti; -} - #ifdef CONFIG_X86_64 -static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l, - void *v) -{ - struct task_migration_notifier *mn = v; - struct pvclock_vsyscall_time_info *pvti; - - pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu); - - /* this is NULL when pvclock vsyscall is not initialized */ - if (unlikely(pvti == NULL)) - return NOTIFY_DONE; - - pvti->migrate_count++; - - return NOTIFY_DONE; -} - -static struct notifier_block pvclock_migrate = { - .notifier_call = pvclock_task_migrate, -}; - /* * Initialize the generic pvclock vsyscall state. This will allocate * a/some page(s) for the per-vcpu pvclock information, set up a @@ -194,17 +155,12 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i, WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE); - pvclock_vdso_info = i; - for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, __pa(i) + (idx*PAGE_SIZE), PAGE_KERNEL_VVAR); } - - register_task_migration_notifier(&pvclock_migrate); - return 0; } #endif diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 40d2473836c9..9793322751e0 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -82,15 +82,18 @@ static notrace cycle_t vread_pvclock(int *mode) cycle_t ret; u64 last; u32 version; - u32 migrate_count; u8 flags; unsigned cpu, cpu1; /* - * When looping to get a consistent (time-info, tsc) pair, we - * also need to deal with the possibility we can switch vcpus, - * so make sure we always re-fetch time-info for the current vcpu. + * Note: hypervisor must guarantee that: + * 1. cpu ID number maps 1:1 to per-CPU pvclock time info. + * 2. that per-CPU pvclock time info is updated if the + * underlying CPU changes. + * 3. that version is increased whenever underlying CPU + * changes. + * */ do { cpu = __getcpu() & VGETCPU_CPU_MASK; @@ -99,27 +102,20 @@ static notrace cycle_t vread_pvclock(int *mode) * __getcpu() calls (Gleb). */ - /* Make sure migrate_count will change if we leave the VCPU. */ - do { - pvti = get_pvti(cpu); - migrate_count = pvti->migrate_count; - - cpu1 = cpu; - cpu = __getcpu() & VGETCPU_CPU_MASK; - } while (unlikely(cpu != cpu1)); + pvti = get_pvti(cpu); version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); /* * Test we're still on the cpu as well as the version. - * - We must read TSC of pvti's VCPU. - * - KVM doesn't follow the versioning protocol, so data could - * change before version if we left the VCPU. + * We could have been migrated just after the first + * vgetcpu but before fetching the version, so we + * wouldn't notice a version change. */ - smp_rmb(); - } while (unlikely((pvti->pvti.version & 1) || - pvti->pvti.version != version || - pvti->migrate_count != migrate_count)); + cpu1 = __getcpu() & VGETCPU_CPU_MASK; + } while (unlikely(cpu != cpu1 || + (pvti->pvti.version & 1) || + pvti->pvti.version != version)); if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) *mode = VCLOCK_NONE; diff --git a/include/linux/sched.h b/include/linux/sched.h index 8222ae40ecb0..26a2e6122734 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -175,14 +175,6 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); extern void calc_global_load(unsigned long ticks); extern void update_cpu_load_nohz(void); -/* Notifier for when a task gets migrated to a new CPU */ -struct task_migration_notifier { - struct task_struct *task; - int from_cpu; - int to_cpu; -}; -extern void register_task_migration_notifier(struct notifier_block *n); - extern unsigned long get_parent_ip(unsigned long addr); extern void dump_cpu_task(int cpu); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f9123a82cbb6..fe22f7510bce 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1016,13 +1016,6 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) rq_clock_skip_update(rq, true); } -static ATOMIC_NOTIFIER_HEAD(task_migration_notifier); - -void register_task_migration_notifier(struct notifier_block *n) -{ - atomic_notifier_chain_register(&task_migration_notifier, n); -} - #ifdef CONFIG_SMP void set_task_cpu(struct task_struct *p, unsigned int new_cpu) { @@ -1053,18 +1046,10 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) trace_sched_migrate_task(p, new_cpu); if (task_cpu(p) != new_cpu) { - struct task_migration_notifier tmn; - if (p->sched_class->migrate_task_rq) p->sched_class->migrate_task_rq(p, new_cpu); p->se.nr_migrations++; perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0); - - tmn.task = p; - tmn.from_cpu = task_cpu(p); - tmn.to_cpu = new_cpu; - - atomic_notifier_call_chain(&task_migration_notifier, 0, &tmn); } __set_task_cpu(p, new_cpu); -- cgit v1.2.3 From ee136af4a064c2f61e2025873584d2c7ec93f4ae Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 21 Apr 2015 11:20:31 +0200 Subject: uas: Add US_FL_MAX_SECTORS_240 flag The usb-storage driver sets max_sectors = 240 in its scsi-host template, for uas we do not want to do that for all devices, but testing has shown that some devices need it. This commit adds a US_FL_MAX_SECTORS_240 flag for such devices, and implements support for it in uas.c, while at it it also adds support for US_FL_MAX_SECTORS_64 to uas.c. Cc: stable@vger.kernel.org # 3.16 Signed-off-by: Hans de Goede Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 2 ++ drivers/usb/storage/uas.c | 10 +++++++++- drivers/usb/storage/usb.c | 6 +++++- include/linux/usb_usual.h | 2 ++ 4 files changed, 18 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index f6befa9855c1..61ab1628a057 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3787,6 +3787,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. READ_CAPACITY_16 command); f = NO_REPORT_OPCODES (don't use report opcodes command, uas only); + g = MAX_SECTORS_240 (don't transfer more than + 240 sectors at a time, uas only); h = CAPACITY_HEURISTICS (decrease the reported device capacity by one sector if the number is odd); diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index c6109c111aab..6d3122afeed3 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -759,7 +759,10 @@ static int uas_eh_bus_reset_handler(struct scsi_cmnd *cmnd) static int uas_slave_alloc(struct scsi_device *sdev) { - sdev->hostdata = (void *)sdev->host->hostdata; + struct uas_dev_info *devinfo = + (struct uas_dev_info *)sdev->host->hostdata; + + sdev->hostdata = devinfo; /* USB has unusual DMA-alignment requirements: Although the * starting address of each scatter-gather element doesn't matter, @@ -778,6 +781,11 @@ static int uas_slave_alloc(struct scsi_device *sdev) */ blk_queue_update_dma_alignment(sdev->request_queue, (512 - 1)); + if (devinfo->flags & US_FL_MAX_SECTORS_64) + blk_queue_max_hw_sectors(sdev->request_queue, 64); + else if (devinfo->flags & US_FL_MAX_SECTORS_240) + blk_queue_max_hw_sectors(sdev->request_queue, 240); + return 0; } diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index db6f6b5ec745..6c10c888f35f 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -479,7 +479,8 @@ void usb_stor_adjust_quirks(struct usb_device *udev, unsigned long *fflags) US_FL_SINGLE_LUN | US_FL_NO_WP_DETECT | US_FL_NO_READ_DISC_INFO | US_FL_NO_READ_CAPACITY_16 | US_FL_INITIAL_READ10 | US_FL_WRITE_CACHE | - US_FL_NO_ATA_1X | US_FL_NO_REPORT_OPCODES); + US_FL_NO_ATA_1X | US_FL_NO_REPORT_OPCODES | + US_FL_MAX_SECTORS_240); p = quirks; while (*p) { @@ -520,6 +521,9 @@ void usb_stor_adjust_quirks(struct usb_device *udev, unsigned long *fflags) case 'f': f |= US_FL_NO_REPORT_OPCODES; break; + case 'g': + f |= US_FL_MAX_SECTORS_240; + break; case 'h': f |= US_FL_CAPACITY_HEURISTICS; break; diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index a7f2604c5f25..7f5f78bd15ad 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -77,6 +77,8 @@ /* Cannot handle ATA_12 or ATA_16 CDBs */ \ US_FLAG(NO_REPORT_OPCODES, 0x04000000) \ /* Cannot handle MI_REPORT_SUPPORTED_OPERATION_CODES */ \ + US_FLAG(MAX_SECTORS_240, 0x08000000) \ + /* Sets max_sectors to 240 */ \ #define US_FLAG(name, value) US_FL_##name = value , enum { US_DO_ALL_FLAGS }; -- cgit v1.2.3 From b00f5c2dc01450bed9fed1a41a637fa917e03c5c Mon Sep 17 00:00:00 2001 From: Frederic Danis Date: Fri, 10 Apr 2015 15:13:05 +0200 Subject: tty: Re-add external interface for tty_set_termios() This is needed by Bluetooth hci_uart module to be able to change speed of Bluetooth controller and local UART. Signed-off-by: Frederic Danis Reviewed-by: Peter Hurley Cc: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_ioctl.c | 3 ++- include/linux/tty.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c index 632fc8152061..8e53fe469664 100644 --- a/drivers/tty/tty_ioctl.c +++ b/drivers/tty/tty_ioctl.c @@ -536,7 +536,7 @@ EXPORT_SYMBOL(tty_termios_hw_change); * Locking: termios_rwsem */ -static int tty_set_termios(struct tty_struct *tty, struct ktermios *new_termios) +int tty_set_termios(struct tty_struct *tty, struct ktermios *new_termios) { struct ktermios old_termios; struct tty_ldisc *ld; @@ -569,6 +569,7 @@ static int tty_set_termios(struct tty_struct *tty, struct ktermios *new_termios) up_write(&tty->termios_rwsem); return 0; } +EXPORT_SYMBOL_GPL(tty_set_termios); /** * set_termios - set termios values for a tty diff --git a/include/linux/tty.h b/include/linux/tty.h index 358a337af598..fe5623c9af71 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -491,6 +491,7 @@ static inline speed_t tty_get_baud_rate(struct tty_struct *tty) extern void tty_termios_copy_hw(struct ktermios *new, struct ktermios *old); extern int tty_termios_hw_change(struct ktermios *a, struct ktermios *b); +extern int tty_set_termios(struct tty_struct *tty, struct ktermios *kt); extern struct tty_ldisc *tty_ldisc_ref(struct tty_struct *); extern void tty_ldisc_deref(struct tty_ldisc *); -- cgit v1.2.3 From 46c264daaaa569e24f8aba877d0fd8167c42a9a4 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 28 Apr 2015 18:33:49 +0200 Subject: bridge/nl: remove wrong use of NLM_F_MULTI NLM_F_MULTI must be used only when a NLMSG_DONE message is sent. In fact, it is sent only at the end of a dump. Libraries like libnl will wait forever for NLMSG_DONE. Fixes: e5a55a898720 ("net: create generic bridge ops") Fixes: 815cccbf10b2 ("ixgbe: add setlink, getlink support to ixgbe and ixgbevf") CC: John Fastabend CC: Sathya Perla CC: Subbu Seetharaman CC: Ajit Khaparde CC: Jeff Kirsher CC: intel-wired-lan@lists.osuosl.org CC: Jiri Pirko CC: Scott Feldman CC: Stephen Hemminger CC: bridge@lists.linux-foundation.org Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 5 +++-- drivers/net/ethernet/intel/i40e/i40e_main.c | 7 ++++--- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 ++-- drivers/net/ethernet/rocker/rocker.c | 5 +++-- include/linux/netdevice.h | 6 ++++-- include/linux/rtnetlink.h | 2 +- net/bridge/br_netlink.c | 4 ++-- net/bridge/br_private.h | 2 +- net/core/rtnetlink.c | 12 +++++++----- 9 files changed, 27 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index fb0bc3c3620e..a6dcbf850c1f 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4846,7 +4846,8 @@ err: } static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev, u32 filter_mask) + struct net_device *dev, u32 filter_mask, + int nlflags) { struct be_adapter *adapter = netdev_priv(dev); int status = 0; @@ -4868,7 +4869,7 @@ static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, return ndo_dflt_bridge_getlink(skb, pid, seq, dev, hsw_mode == PORT_FWD_TYPE_VEPA ? BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB, - 0, 0); + 0, 0, nlflags); } #ifdef CONFIG_BE2NET_VXLAN diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 24481cd7e59a..a54c14491e3b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -8053,10 +8053,10 @@ static int i40e_ndo_bridge_setlink(struct net_device *dev, #ifdef HAVE_BRIDGE_FILTER static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, - u32 __always_unused filter_mask) + u32 __always_unused filter_mask, int nlflags) #else static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev) + struct net_device *dev, int nlflags) #endif /* HAVE_BRIDGE_FILTER */ { struct i40e_netdev_priv *np = netdev_priv(dev); @@ -8078,7 +8078,8 @@ static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, if (!veb) return 0; - return ndo_dflt_bridge_getlink(skb, pid, seq, dev, veb->bridge_mode); + return ndo_dflt_bridge_getlink(skb, pid, seq, dev, veb->bridge_mode, + nlflags); } #endif /* HAVE_BRIDGE_ATTRIBS */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index d3f4b0ceb3f7..5be12a00e1f4 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8044,7 +8044,7 @@ static int ixgbe_ndo_bridge_setlink(struct net_device *dev, static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, - u32 filter_mask) + u32 filter_mask, int nlflags) { struct ixgbe_adapter *adapter = netdev_priv(dev); @@ -8052,7 +8052,7 @@ static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, return 0; return ndo_dflt_bridge_getlink(skb, pid, seq, dev, - adapter->bridge_mode, 0, 0); + adapter->bridge_mode, 0, 0, nlflags); } static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index a570a60533be..ec251531bd9f 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4176,14 +4176,15 @@ static int rocker_port_bridge_setlink(struct net_device *dev, static int rocker_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, - u32 filter_mask) + u32 filter_mask, int nlflags) { struct rocker_port *rocker_port = netdev_priv(dev); u16 mode = BRIDGE_MODE_UNDEF; u32 mask = BR_LEARNING | BR_LEARNING_SYNC; return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, - rocker_port->brport_flags, mask); + rocker_port->brport_flags, mask, + nlflags); } static int rocker_port_get_phys_port_name(struct net_device *dev, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index dbad4d728b4b..1899c74a7127 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -977,7 +977,8 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh, * u16 flags) * int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, - * struct net_device *dev, u32 filter_mask) + * struct net_device *dev, u32 filter_mask, + * int nlflags) * int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh, * u16 flags); * @@ -1173,7 +1174,8 @@ struct net_device_ops { int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, - u32 filter_mask); + u32 filter_mask, + int nlflags); int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh, u16 flags); diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 2da5d1081ad9..7b8e260c4a27 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -122,5 +122,5 @@ extern int ndo_dflt_fdb_del(struct ndmsg *ndm, extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u16 mode, - u32 flags, u32 mask); + u32 flags, u32 mask, int nlflags); #endif /* __LINUX_RTNETLINK_H */ diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 0e4ddb81610d..4b5c236998ff 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -394,7 +394,7 @@ errout: * Dump information about all ports, in response to GETLINK */ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev, u32 filter_mask) + struct net_device *dev, u32 filter_mask, int nlflags) { struct net_bridge_port *port = br_port_get_rtnl(dev); @@ -402,7 +402,7 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, !(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) return 0; - return br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI, + return br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, nlflags, filter_mask, dev); } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 6ca0251cb478..3362c29400f1 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -828,7 +828,7 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port); int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags); int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags); int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, - u32 filter_mask); + u32 filter_mask, int nlflags); #ifdef CONFIG_SYSFS /* br_sysfs_if.c */ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 358d52a38533..666e0928ba40 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2854,7 +2854,7 @@ static int brport_nla_put_flag(struct sk_buff *skb, u32 flags, u32 mask, int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u16 mode, - u32 flags, u32 mask) + u32 flags, u32 mask, int nlflags) { struct nlmsghdr *nlh; struct ifinfomsg *ifm; @@ -2863,7 +2863,7 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; struct net_device *br_dev = netdev_master_upper_dev_get(dev); - nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), NLM_F_MULTI); + nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), nlflags); if (nlh == NULL) return -EMSGSIZE; @@ -2969,7 +2969,8 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) { if (idx >= cb->args[0] && br_dev->netdev_ops->ndo_bridge_getlink( - skb, portid, seq, dev, filter_mask) < 0) + skb, portid, seq, dev, filter_mask, + NLM_F_MULTI) < 0) break; idx++; } @@ -2977,7 +2978,8 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) if (ops->ndo_bridge_getlink) { if (idx >= cb->args[0] && ops->ndo_bridge_getlink(skb, portid, seq, dev, - filter_mask) < 0) + filter_mask, + NLM_F_MULTI) < 0) break; idx++; } @@ -3018,7 +3020,7 @@ static int rtnl_bridge_notify(struct net_device *dev) goto errout; } - err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); + err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0, 0); if (err < 0) goto errout; -- cgit v1.2.3 From 7829fb09a2b4268b30dd9bc782fa5ebee278b137 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 30 Apr 2015 04:13:52 +0200 Subject: lib: make memzero_explicit more robust against dead store elimination In commit 0b053c951829 ("lib: memzero_explicit: use barrier instead of OPTIMIZER_HIDE_VAR"), we made memzero_explicit() more robust in case LTO would decide to inline memzero_explicit() and eventually find out it could be elimiated as dead store. While using barrier() works well for the case of gcc, recent efforts from LLVMLinux people suggest to use llvm as an alternative to gcc, and there, Stephan found in a simple stand-alone user space example that llvm could nevertheless optimize and thus elimitate the memset(). A similar issue has been observed in the referenced llvm bug report, which is regarded as not-a-bug. Based on some experiments, icc is a bit special on its own, while it doesn't seem to eliminate the memset(), it could do so with an own implementation, and then result in similar findings as with llvm. The fix in this patch now works for all three compilers (also tested with more aggressive optimization levels). Arguably, in the current kernel tree it's more of a theoretical issue, but imho, it's better to be pedantic about it. It's clearly visible with gcc/llvm though, with the below code: if we would have used barrier() only here, llvm would have omitted clearing, not so with barrier_data() variant: static inline void memzero_explicit(void *s, size_t count) { memset(s, 0, count); barrier_data(s); } int main(void) { char buff[20]; memzero_explicit(buff, sizeof(buff)); return 0; } $ gcc -O2 test.c $ gdb a.out (gdb) disassemble main Dump of assembler code for function main: 0x0000000000400400 <+0>: lea -0x28(%rsp),%rax 0x0000000000400405 <+5>: movq $0x0,-0x28(%rsp) 0x000000000040040e <+14>: movq $0x0,-0x20(%rsp) 0x0000000000400417 <+23>: movl $0x0,-0x18(%rsp) 0x000000000040041f <+31>: xor %eax,%eax 0x0000000000400421 <+33>: retq End of assembler dump. $ clang -O2 test.c $ gdb a.out (gdb) disassemble main Dump of assembler code for function main: 0x00000000004004f0 <+0>: xorps %xmm0,%xmm0 0x00000000004004f3 <+3>: movaps %xmm0,-0x18(%rsp) 0x00000000004004f8 <+8>: movl $0x0,-0x8(%rsp) 0x0000000000400500 <+16>: lea -0x18(%rsp),%rax 0x0000000000400505 <+21>: xor %eax,%eax 0x0000000000400507 <+23>: retq End of assembler dump. As gcc, clang, but also icc defines __GNUC__, it's sufficient to define this in compiler-gcc.h only to be picked up. For a fallback or otherwise unsupported compiler, we define it as a barrier. Similarly, for ecc which does not support gcc inline asm. Reference: https://llvm.org/bugs/show_bug.cgi?id=15495 Reported-by: Stephan Mueller Tested-by: Stephan Mueller Signed-off-by: Daniel Borkmann Cc: Theodore Ts'o Cc: Stephan Mueller Cc: Hannes Frederic Sowa Cc: mancha security Cc: Mark Charlebois Cc: Behan Webster Signed-off-by: Herbert Xu --- include/linux/compiler-gcc.h | 16 +++++++++++++++- include/linux/compiler-intel.h | 3 +++ include/linux/compiler.h | 4 ++++ lib/string.c | 2 +- 4 files changed, 23 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index cdf13ca7cac3..371e560d13cf 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -9,10 +9,24 @@ + __GNUC_MINOR__ * 100 \ + __GNUC_PATCHLEVEL__) - /* Optimization barrier */ + /* The "volatile" is due to gcc bugs */ #define barrier() __asm__ __volatile__("": : :"memory") +/* + * This version is i.e. to prevent dead stores elimination on @ptr + * where gcc and llvm may behave differently when otherwise using + * normal barrier(): while gcc behavior gets along with a normal + * barrier(), llvm needs an explicit input variable to be assumed + * clobbered. The issue is as follows: while the inline asm might + * access any memory it wants, the compiler could have fit all of + * @ptr into memory registers instead, and since @ptr never escaped + * from that, it proofed that the inline asm wasn't touching any of + * it. This version works well with both compilers, i.e. we're telling + * the compiler that the inline asm absolutely may see the contents + * of @ptr. See also: https://llvm.org/bugs/show_bug.cgi?id=15495 + */ +#define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory") /* * This macro obfuscates arithmetic on a variable address so that gcc diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h index ba147a1727e6..0c9a2f2c2802 100644 --- a/include/linux/compiler-intel.h +++ b/include/linux/compiler-intel.h @@ -13,9 +13,12 @@ /* Intel ECC compiler doesn't support gcc specific asm stmts. * It uses intrinsics to do the equivalent things. */ +#undef barrier_data #undef RELOC_HIDE #undef OPTIMIZER_HIDE_VAR +#define barrier_data(ptr) barrier() + #define RELOC_HIDE(ptr, off) \ ({ unsigned long __ptr; \ __ptr = (unsigned long) (ptr); \ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 0e41ca0e5927..867722591be2 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -169,6 +169,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); # define barrier() __memory_barrier() #endif +#ifndef barrier_data +# define barrier_data(ptr) barrier() +#endif + /* Unreachable code */ #ifndef unreachable # define unreachable() do { } while (1) diff --git a/lib/string.c b/lib/string.c index a5792019193c..bb3d4b6993c4 100644 --- a/lib/string.c +++ b/lib/string.c @@ -607,7 +607,7 @@ EXPORT_SYMBOL(memset); void memzero_explicit(void *s, size_t count) { memset(s, 0, count); - barrier(); + barrier_data(s); } EXPORT_SYMBOL(memzero_explicit); -- cgit v1.2.3 From b2387ddcced8de3e6471a2fb16a409577063016f Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 1 May 2015 09:59:44 -0700 Subject: blk-mq: fix FUA request hang When a FUA request enters its DATA stage of flush pipeline, the request is added to mq requeue list, the request will then be added to ctx->rq_list. blk_mq_attempt_merge() might merge the request with a bio. Later when the request is finished the flush pipeline, the request->__data_len is 0. Then I only saw the bio gets endio called, the original request never finish. Adding REQ_FLUSH_SEQ into REQ_NOMERGE_FLAGS looks an easy fix. stable: 3.15+ Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index a1b25e35ea5f..b7299febc4b4 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -220,7 +220,7 @@ enum rq_flag_bits { /* This mask is used for both bio and request merge checking */ #define REQ_NOMERGE_FLAGS \ - (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA) + (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA | REQ_FLUSH_SEQ) #define REQ_RAHEAD (1ULL << __REQ_RAHEAD) #define REQ_THROTTLED (1ULL << __REQ_THROTTLED) -- cgit v1.2.3 From 05836c378c7af9527b98a83746f32c7289a5f3c8 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 5 May 2015 16:23:57 -0700 Subject: util_macros.h: have array pointer point to array of constants Using the new find_closest() macro can result in the following sparse warnings. drivers/hwmon/lm85.c:194:16: warning: incorrect type in initializer (different modifiers) drivers/hwmon/lm85.c:194:16: expected int *__fc_a drivers/hwmon/lm85.c:194:16: got int static const [toplevel] * drivers/hwmon/lm85.c:210:16: warning: incorrect type in initializer (different modifiers) drivers/hwmon/lm85.c:210:16: expected int *__fc_a drivers/hwmon/lm85.c:210:16: got int const *map This is because the array passed to find_closest() will typically be declared as array of constants, but the macro declares a non-constant pointer to it. Signed-off-by: Guenter Roeck Cc: Bartosz Golaszewski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/util_macros.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/util_macros.h b/include/linux/util_macros.h index d5f4fb69dba3..f9b2ce58039b 100644 --- a/include/linux/util_macros.h +++ b/include/linux/util_macros.h @@ -5,7 +5,7 @@ ({ \ typeof(as) __fc_i, __fc_as = (as) - 1; \ typeof(x) __fc_x = (x); \ - typeof(*a) *__fc_a = (a); \ + typeof(*a) const *__fc_a = (a); \ for (__fc_i = 0; __fc_i < __fc_as; __fc_i++) { \ if (__fc_x op DIV_ROUND_CLOSEST(__fc_a[__fc_i] + \ __fc_a[__fc_i + 1], 2)) \ -- cgit v1.2.3 From d8fd150fe3935e1692bf57c66691e17409ebb9c1 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 5 May 2015 16:24:00 -0700 Subject: nilfs2: fix sanity check of btree level in nilfs_btree_root_broken() The range check for b-tree level parameter in nilfs_btree_root_broken() is wrong; it accepts the case of "level == NILFS_BTREE_LEVEL_MAX" even though the level is limited to values in the range of 0 to (NILFS_BTREE_LEVEL_MAX - 1). Since the level parameter is read from storage device and used to index nilfs_btree_path array whose element count is NILFS_BTREE_LEVEL_MAX, it can cause memory overrun during btree operations if the boundary value is set to the level parameter on device. This fixes the broken sanity check and adds a comment to clarify that the upper bound NILFS_BTREE_LEVEL_MAX is exclusive. Signed-off-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/btree.c | 2 +- include/linux/nilfs2_fs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 059f37137f9a..919fd5bb14a8 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -388,7 +388,7 @@ static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, nchildren = nilfs_btree_node_get_nchildren(node); if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || - level > NILFS_BTREE_LEVEL_MAX || + level >= NILFS_BTREE_LEVEL_MAX || nchildren < 0 || nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) { pr_crit("NILFS: bad btree root (inode number=%lu): level = %d, flags = 0x%x, nchildren = %d\n", diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h index ff3fea3194c6..9abb763e4b86 100644 --- a/include/linux/nilfs2_fs.h +++ b/include/linux/nilfs2_fs.h @@ -460,7 +460,7 @@ struct nilfs_btree_node { /* level */ #define NILFS_BTREE_LEVEL_DATA 0 #define NILFS_BTREE_LEVEL_NODE_MIN (NILFS_BTREE_LEVEL_DATA + 1) -#define NILFS_BTREE_LEVEL_MAX 14 +#define NILFS_BTREE_LEVEL_MAX 14 /* Max level (exclusive) */ /** * struct nilfs_palloc_group_desc - block group descriptor -- cgit v1.2.3 From ac01ce1410fc2c7b5f3af5e9c972e6a412eee54f Mon Sep 17 00:00:00 2001 From: Alex Bennée Date: Wed, 29 Apr 2015 16:18:46 +0100 Subject: tracing: Make ftrace_print_array_seq compute buf_len MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only caller to this function (__print_array) was getting it wrong by passing the array length instead of buffer length. As the element size was already being passed for other reasons it seems reasonable to push the calculation of buffer length into the function. Link: http://lkml.kernel.org/r/1430320727-14582-1-git-send-email-alex.bennee@linaro.org Signed-off-by: Alex Bennée Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 2 +- kernel/trace/trace_output.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 46e83c2156c6..f9ecf63d47f1 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -46,7 +46,7 @@ const char *ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int len); const char *ftrace_print_array_seq(struct trace_seq *p, - const void *buf, int buf_len, + const void *buf, int count, size_t el_size); struct trace_iterator; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 692bf7184c8c..25a086bcb700 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -178,12 +178,13 @@ ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len) EXPORT_SYMBOL(ftrace_print_hex_seq); const char * -ftrace_print_array_seq(struct trace_seq *p, const void *buf, int buf_len, +ftrace_print_array_seq(struct trace_seq *p, const void *buf, int count, size_t el_size) { const char *ret = trace_seq_buffer_ptr(p); const char *prefix = ""; void *ptr = (void *)buf; + size_t buf_len = count * el_size; trace_seq_putc(p, '{'); -- cgit v1.2.3