From 4b8164b91d9fdff4dbac0a742d076bdff7fda21b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 31 Jan 2015 20:08:47 -0500
Subject: new helper: dup_iter()

Copy iter and kmemdup the underlying array for the copy.  Returns
a pointer to result of kmemdup() to be kfree()'d later.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/uio.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 07a022641996..71880299ed48 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -98,6 +98,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
 			size_t maxsize, size_t *start);
 int iov_iter_npages(const struct iov_iter *i, int maxpages);
 
+const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags);
+
 static inline size_t iov_iter_count(struct iov_iter *i)
 {
 	return i->count;
-- 
cgit v1.2.3


From 846cd66788b11105a62785078360c8854aa98310 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 18 Feb 2015 11:38:06 +0100
Subject: net: Initialize all members in skb_gro_remcsum_init()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

skb_gro_remcsum_init() initializes the gro_remcsum.delta member only,
leading to compiler warnings about a possibly uninitialized
gro_remcsum.offset member:

drivers/net/vxlan.c: In function ‘vxlan_gro_receive’:
drivers/net/vxlan.c:602: warning: ‘grc.offset’ may be used uninitialized in this function
net/ipv4/fou.c: In function ‘gue_gro_receive’:
net/ipv4/fou.c:262: warning: ‘grc.offset’ may be used uninitialized in this function

While these are harmless for now:
  - skb_gro_remcsum_process() sets offset before changing delta,
  - skb_gro_remcsum_cleanup() checks if delta is non-zero before
    accessing offset,
it's safer to let the initialization function initialize all members.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5897b4ea5a3f..429d1790a27e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2342,6 +2342,7 @@ struct gro_remcsum {
 
 static inline void skb_gro_remcsum_init(struct gro_remcsum *grc)
 {
+	grc->offset = 0;
 	grc->delta = 0;
 }
 
-- 
cgit v1.2.3


From b9ebafbe8cfeeddec881504c446cccd0d87a51b6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 20 Feb 2015 06:48:57 -0800
Subject: rhashtable: ensure cache line alignment on bucket_table

struct bucket_table contains mostly read fields :

size, locks_mask, locks.

Make sure these are not sharing a cache line with buckets[]

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 58851275fed9..cb2104be2135 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -54,10 +54,11 @@ struct rhash_head {
  * @buckets: size * hash buckets
  */
 struct bucket_table {
-	size_t				size;
-	unsigned int			locks_mask;
-	spinlock_t			*locks;
-	struct rhash_head __rcu		*buckets[];
+	size_t			size;
+	unsigned int		locks_mask;
+	spinlock_t		*locks;
+
+	struct rhash_head __rcu	*buckets[] ____cacheline_aligned_in_smp;
 };
 
 typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed);
-- 
cgit v1.2.3


From 737eb0301f296d55c22350c6968ff1ef51bacb5f Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 20 Feb 2015 14:53:46 +0000
Subject: genirq / PM: better describe IRQF_NO_SUSPEND semantics

The IRQF_NO_SUSPEND flag is intended to be used for interrupts required
to be enabled during the suspend-resume cycle. This mostly consists of
IPIs and timer interrupts, potentially including chained irqchip
interrupts if these are necessary to handle timers or IPIs. If an
interrupt does not fall into one of the aforementioned categories,
requesting it with IRQF_NO_SUSPEND is likely incorrect.

Using IRQF_NO_SUSPEND does not guarantee that the interrupt can wake the
system from a suspended state. For an interrupt to be able to trigger a
wakeup, it may be necessary to program various components of the system.
In these cases it is necessary to use {enable,disabled}_irq_wake.

Unfortunately, several drivers assume that IRQF_NO_SUSPEND ensures that
an IRQ can wake up the system, and the documentation can be read
ambiguously w.r.t. this property.

This patch updates the documentation regarding IRQF_NO_SUSPEND to make
this caveat explicit, hopefully making future misuse rarer. Cleanup of
existing misuse will occur as part of later patch series.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/power/suspend-and-interrupts.txt | 6 ++++--
 include/linux/interrupt.h                      | 4 +++-
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/power/suspend-and-interrupts.txt b/Documentation/power/suspend-and-interrupts.txt
index 2f9c5a5fcb25..50493c9284b4 100644
--- a/Documentation/power/suspend-and-interrupts.txt
+++ b/Documentation/power/suspend-and-interrupts.txt
@@ -40,8 +40,10 @@ but also to IPIs and to some other special-purpose interrupts.
 
 The IRQF_NO_SUSPEND flag is used to indicate that to the IRQ subsystem when
 requesting a special-purpose interrupt.  It causes suspend_device_irqs() to
-leave the corresponding IRQ enabled so as to allow the interrupt to work all
-the time as expected.
+leave the corresponding IRQ enabled so as to allow the interrupt to work as
+expected during the suspend-resume cycle, but does not guarantee that the
+interrupt will wake the system from a suspended state -- for such cases it is
+necessary to use enable_irq_wake().
 
 Note that the IRQF_NO_SUSPEND flag affects the entire IRQ and not just one
 user of it.  Thus, if the IRQ is shared, all of the interrupt handlers installed
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index d9b05b5bf8c7..606771c7cac2 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -52,7 +52,9 @@
  * IRQF_ONESHOT - Interrupt is not reenabled after the hardirq handler finished.
  *                Used by threaded interrupts which need to keep the
  *                irq line disabled until the threaded handler has been run.
- * IRQF_NO_SUSPEND - Do not disable this IRQ during suspend
+ * IRQF_NO_SUSPEND - Do not disable this IRQ during suspend.  Does not guarantee
+ *                   that this interrupt will wake the system from a suspended
+ *                   state.  See Documentation/power/suspend-and-interrupts.txt
  * IRQF_FORCE_RESUME - Force enable it on resume even if IRQF_NO_SUSPEND is set
  * IRQF_NO_THREAD - Interrupt cannot be threaded
  * IRQF_EARLY_RESUME - Resume IRQ early during syscore instead of at device
-- 
cgit v1.2.3


From bc4b1f486fe69b86769e07c8edce472327a8462b Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Sun, 15 Feb 2015 11:57:53 +0700
Subject: Revert "USB: serial: make bulk_out_size a lower limit"

This reverts commit 5083fd7bdfe6760577235a724cf6dccae13652c2.

A bulk-out size smaller than the end-point size is indeed valid. The
offending commit broke the usb-debug driver for EHCI debug devices,
which use 8-byte buffers.

Fixes: 5083fd7bdfe6 ("USB: serial: make bulk_out_size a lower limit")
Reported-by: "Li, Elvin" <elvin.li@intel.com>
Cc: stable <stable@vger.kernel.org>	# v3.15
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/usb-serial.c | 5 +++--
 include/linux/usb/serial.h      | 3 +--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 475723c006f9..19842370a07f 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -940,8 +940,9 @@ static int usb_serial_probe(struct usb_interface *interface,
 		port = serial->port[i];
 		if (kfifo_alloc(&port->write_fifo, PAGE_SIZE, GFP_KERNEL))
 			goto probe_error;
-		buffer_size = max_t(int, serial->type->bulk_out_size,
-						usb_endpoint_maxp(endpoint));
+		buffer_size = serial->type->bulk_out_size;
+		if (!buffer_size)
+			buffer_size = usb_endpoint_maxp(endpoint);
 		port->bulk_out_size = buffer_size;
 		port->bulk_out_endpointAddress = endpoint->bEndpointAddress;
 
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 9bb547c7bce7..704a1ab8240c 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -190,8 +190,7 @@ static inline void usb_set_serial_data(struct usb_serial *serial, void *data)
  * @num_ports: the number of different ports this device will have.
  * @bulk_in_size: minimum number of bytes to allocate for bulk-in buffer
  *	(0 = end-point size)
- * @bulk_out_size: minimum number of bytes to allocate for bulk-out buffer
- *	(0 = end-point size)
+ * @bulk_out_size: bytes to allocate for bulk-out buffer (0 = end-point size)
  * @calc_num_ports: pointer to a function to determine how many ports this
  *	device has dynamically.  It will be called after the probe()
  *	callback is called, but before attach()
-- 
cgit v1.2.3


From 09ee96b21456883e108c3b00597bb37ec512151b Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Thu, 26 Feb 2015 11:41:28 -0500
Subject: dm snapshot: suspend merging snapshot when doing exception handover

The "dm snapshot: suspend origin when doing exception handover" commit
fixed a exception store handover bug associated with pending exceptions
to the "snapshot-origin" target.

However, a similar problem exists in snapshot merging.  When snapshot
merging is in progress, we use the target "snapshot-merge" instead of
"snapshot-origin".  Consequently, during exception store handover, we
must find the snapshot-merge target and suspend its associated
mapped_device.

To avoid lockdep warnings, the target must be suspended and resumed
without holding _origins_lock.

Introduce a dm_hold() function that grabs a reference on a
mapped_device, but unlike dm_get(), it doesn't crash if the device has
the DMF_FREEING flag set, it returns an error in this case.

In snapshot_resume() we grab the reference to the origin device using
dm_hold() while holding _origins_lock (_origins_lock guarantees that the
device won't disappear).  Then we release _origins_lock, suspend the
device and grab _origins_lock again.

NOTE to stable@ people:
When backporting to kernels 3.18 and older, use dm_internal_suspend and
dm_internal_resume instead of dm_internal_suspend_fast and
dm_internal_resume_fast.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org
---
 drivers/md/dm-snap.c          | 35 +++++++++++++++++++++++++++++------
 drivers/md/dm.c               | 13 +++++++++++++
 include/linux/device-mapper.h |  1 +
 3 files changed, 43 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index c2bf822bad6f..f83a0f3fc365 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1888,20 +1888,39 @@ static int snapshot_preresume(struct dm_target *ti)
 static void snapshot_resume(struct dm_target *ti)
 {
 	struct dm_snapshot *s = ti->private;
-	struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
+	struct dm_snapshot *snap_src = NULL, *snap_dest = NULL, *snap_merging = NULL;
 	struct dm_origin *o;
 	struct mapped_device *origin_md = NULL;
+	bool must_restart_merging = false;
 
 	down_read(&_origins_lock);
 
 	o = __lookup_dm_origin(s->origin->bdev);
 	if (o)
 		origin_md = dm_table_get_md(o->ti->table);
+	if (!origin_md) {
+		(void) __find_snapshots_sharing_cow(s, NULL, NULL, &snap_merging);
+		if (snap_merging)
+			origin_md = dm_table_get_md(snap_merging->ti->table);
+	}
 	if (origin_md == dm_table_get_md(ti->table))
 		origin_md = NULL;
+	if (origin_md) {
+		if (dm_hold(origin_md))
+			origin_md = NULL;
+	}
 
-	if (origin_md)
+	up_read(&_origins_lock);
+
+	if (origin_md) {
 		dm_internal_suspend_fast(origin_md);
+		if (snap_merging && test_bit(RUNNING_MERGE, &snap_merging->state_bits)) {
+			must_restart_merging = true;
+			stop_merge(snap_merging);
+		}
+	}
+
+	down_read(&_origins_lock);
 
 	(void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL);
 	if (snap_src && snap_dest) {
@@ -1912,11 +1931,15 @@ static void snapshot_resume(struct dm_target *ti)
 		up_write(&snap_src->lock);
 	}
 
-	if (origin_md)
-		dm_internal_resume_fast(origin_md);
-
 	up_read(&_origins_lock);
 
+	if (origin_md) {
+		if (must_restart_merging)
+			start_merge(snap_merging);
+		dm_internal_resume_fast(origin_md);
+		dm_put(origin_md);
+	}
+
 	/* Now we have correct chunk size, reregister */
 	reregister_snapshot(s);
 
@@ -2360,7 +2383,7 @@ static struct target_type snapshot_target = {
 
 static struct target_type merge_target = {
 	.name    = dm_snapshot_merge_target_name,
-	.version = {1, 2, 0},
+	.version = {1, 3, 0},
 	.module  = THIS_MODULE,
 	.ctr     = snapshot_ctr,
 	.dtr     = snapshot_dtr,
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 6e2b2e97abe9..9b641b38b857 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -2616,6 +2616,19 @@ void dm_get(struct mapped_device *md)
 	BUG_ON(test_bit(DMF_FREEING, &md->flags));
 }
 
+int dm_hold(struct mapped_device *md)
+{
+	spin_lock(&_minor_lock);
+	if (test_bit(DMF_FREEING, &md->flags)) {
+		spin_unlock(&_minor_lock);
+		return -EBUSY;
+	}
+	dm_get(md);
+	spin_unlock(&_minor_lock);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dm_hold);
+
 const char *dm_device_name(struct mapped_device *md)
 {
 	return md->name;
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 2646aed1d3fe..fd23978d93fe 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -375,6 +375,7 @@ int dm_create(int minor, struct mapped_device **md);
  */
 struct mapped_device *dm_get_md(dev_t dev);
 void dm_get(struct mapped_device *md);
+int dm_hold(struct mapped_device *md);
 void dm_put(struct mapped_device *md);
 
 /*
-- 
cgit v1.2.3


From 4c4b52d9b2df45e8216d3e30b5452e4a364d2cac Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 25 Feb 2015 16:31:54 +0100
Subject: rhashtable: remove indirection for grow/shrink decision functions

Currently, all real users of rhashtable default their grow and shrink
decision functions to rht_grow_above_75() and rht_shrink_below_30(),
so that there's currently no need to have this explicitly selectable.

It can/should be generic and private inside rhashtable until a real
use case pops up. Since we can make this private, we'll save us this
additional indirection layer and can improve insertion/deletion time
as well.

Reference: http://patchwork.ozlabs.org/patch/443040/
Suggested-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 13 -----------
 lib/rhashtable.c           | 56 ++++++++++++++--------------------------------
 lib/test_rhashtable.c      |  1 +
 net/netfilter/nft_hash.c   |  2 --
 net/netlink/af_netlink.c   |  2 --
 net/tipc/socket.c          |  2 --
 6 files changed, 18 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index cb2104be2135..d438eeb08bff 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -79,12 +79,6 @@ struct rhashtable;
  * @locks_mul: Number of bucket locks to allocate per cpu (default: 128)
  * @hashfn: Function to hash key
  * @obj_hashfn: Function to hash object
- * @grow_decision: If defined, may return true if table should expand
- * @shrink_decision: If defined, may return true if table should shrink
- *
- * Note: when implementing the grow and shrink decision function, min/max
- * shift must be enforced, otherwise, resizing watermarks they set may be
- * useless.
  */
 struct rhashtable_params {
 	size_t			nelem_hint;
@@ -98,10 +92,6 @@ struct rhashtable_params {
 	size_t			locks_mul;
 	rht_hashfn_t		hashfn;
 	rht_obj_hashfn_t	obj_hashfn;
-	bool			(*grow_decision)(const struct rhashtable *ht,
-						 size_t new_size);
-	bool			(*shrink_decision)(const struct rhashtable *ht,
-						   size_t new_size);
 };
 
 /**
@@ -193,9 +183,6 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params);
 void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node);
 bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node);
 
-bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size);
-bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size);
-
 int rhashtable_expand(struct rhashtable *ht);
 int rhashtable_shrink(struct rhashtable *ht);
 
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index bcf119bfdef4..090641db4c0d 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -247,26 +247,24 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
  * @ht:		hash table
  * @new_size:	new table size
  */
-bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size)
+static bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size)
 {
 	/* Expand table when exceeding 75% load */
 	return atomic_read(&ht->nelems) > (new_size / 4 * 3) &&
 	       (!ht->p.max_shift || atomic_read(&ht->shift) < ht->p.max_shift);
 }
-EXPORT_SYMBOL_GPL(rht_grow_above_75);
 
 /**
  * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size
  * @ht:		hash table
  * @new_size:	new table size
  */
-bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size)
+static bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size)
 {
 	/* Shrink table beneath 30% load */
 	return atomic_read(&ht->nelems) < (new_size * 3 / 10) &&
 	       (atomic_read(&ht->shift) > ht->p.min_shift);
 }
-EXPORT_SYMBOL_GPL(rht_shrink_below_30);
 
 static void lock_buckets(struct bucket_table *new_tbl,
 			 struct bucket_table *old_tbl, unsigned int hash)
@@ -528,40 +526,19 @@ static void rht_deferred_worker(struct work_struct *work)
 	list_for_each_entry(walker, &ht->walkers, list)
 		walker->resize = true;
 
-	if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size))
+	if (rht_grow_above_75(ht, tbl->size))
 		rhashtable_expand(ht);
-	else if (ht->p.shrink_decision && ht->p.shrink_decision(ht, tbl->size))
+	else if (rht_shrink_below_30(ht, tbl->size))
 		rhashtable_shrink(ht);
-
 unlock:
 	mutex_unlock(&ht->mutex);
 }
 
-static void rhashtable_probe_expand(struct rhashtable *ht)
-{
-	const struct bucket_table *new_tbl = rht_dereference_rcu(ht->future_tbl, ht);
-	const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
-
-	/* Only adjust the table if no resizing is currently in progress. */
-	if (tbl == new_tbl && ht->p.grow_decision &&
-	    ht->p.grow_decision(ht, tbl->size))
-		schedule_work(&ht->run_work);
-}
-
-static void rhashtable_probe_shrink(struct rhashtable *ht)
-{
-	const struct bucket_table *new_tbl = rht_dereference_rcu(ht->future_tbl, ht);
-	const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
-
-	/* Only adjust the table if no resizing is currently in progress. */
-	if (tbl == new_tbl && ht->p.shrink_decision &&
-	    ht->p.shrink_decision(ht, tbl->size))
-		schedule_work(&ht->run_work);
-}
-
 static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
-				struct bucket_table *tbl, u32 hash)
+				struct bucket_table *tbl,
+				const struct bucket_table *old_tbl, u32 hash)
 {
+	bool no_resize_running = tbl == old_tbl;
 	struct rhash_head *head;
 
 	hash = rht_bucket_index(tbl, hash);
@@ -577,8 +554,8 @@ static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
 	rcu_assign_pointer(tbl->buckets[hash], obj);
 
 	atomic_inc(&ht->nelems);
-
-	rhashtable_probe_expand(ht);
+	if (no_resize_running && rht_grow_above_75(ht, tbl->size))
+		schedule_work(&ht->run_work);
 }
 
 /**
@@ -608,7 +585,7 @@ void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj)
 	hash = obj_raw_hashfn(ht, rht_obj(ht, obj));
 
 	lock_buckets(tbl, old_tbl, hash);
-	__rhashtable_insert(ht, obj, tbl, hash);
+	__rhashtable_insert(ht, obj, tbl, old_tbl, hash);
 	unlock_buckets(tbl, old_tbl, hash);
 
 	rcu_read_unlock();
@@ -690,8 +667,11 @@ found:
 	unlock_buckets(new_tbl, old_tbl, new_hash);
 
 	if (ret) {
+		bool no_resize_running = new_tbl == old_tbl;
+
 		atomic_dec(&ht->nelems);
-		rhashtable_probe_shrink(ht);
+		if (no_resize_running && rht_shrink_below_30(ht, new_tbl->size))
+			schedule_work(&ht->run_work);
 	}
 
 	rcu_read_unlock();
@@ -861,7 +841,7 @@ bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
 		goto exit;
 	}
 
-	__rhashtable_insert(ht, obj, new_tbl, new_hash);
+	__rhashtable_insert(ht, obj, new_tbl, old_tbl, new_hash);
 
 exit:
 	unlock_buckets(new_tbl, old_tbl, new_hash);
@@ -1123,8 +1103,7 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
 	if (!ht->p.hash_rnd)
 		get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd));
 
-	if (ht->p.grow_decision || ht->p.shrink_decision)
-		INIT_WORK(&ht->run_work, rht_deferred_worker);
+	INIT_WORK(&ht->run_work, rht_deferred_worker);
 
 	return 0;
 }
@@ -1142,8 +1121,7 @@ void rhashtable_destroy(struct rhashtable *ht)
 {
 	ht->being_destroyed = true;
 
-	if (ht->p.grow_decision || ht->p.shrink_decision)
-		cancel_work_sync(&ht->run_work);
+	cancel_work_sync(&ht->run_work);
 
 	mutex_lock(&ht->mutex);
 	bucket_table_free(rht_dereference(ht->tbl, ht));
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index f9e9d734446a..67c7593d1dd6 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -201,6 +201,7 @@ static int __init test_rht_init(void)
 		.key_offset = offsetof(struct test_obj, value),
 		.key_len = sizeof(int),
 		.hashfn = jhash,
+		.max_shift = 1, /* we expand/shrink manually here */
 		.nulls_base = (3U << RHT_BASE_SHIFT),
 	};
 	int err;
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 61e6c407476a..c82df0a48fcd 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -192,8 +192,6 @@ static int nft_hash_init(const struct nft_set *set,
 		.key_offset = offsetof(struct nft_hash_elem, key),
 		.key_len = set->klen,
 		.hashfn = jhash,
-		.grow_decision = rht_grow_above_75,
-		.shrink_decision = rht_shrink_below_30,
 	};
 
 	return rhashtable_init(priv, &params);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 2702673f0f23..05919bf3f670 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -3126,8 +3126,6 @@ static int __init netlink_proto_init(void)
 		.key_len = sizeof(u32), /* portid */
 		.hashfn = jhash,
 		.max_shift = 16, /* 64K */
-		.grow_decision = rht_grow_above_75,
-		.shrink_decision = rht_shrink_below_30,
 	};
 
 	if (err != 0)
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index f73e975af80b..b4d4467d0bb0 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2364,8 +2364,6 @@ int tipc_sk_rht_init(struct net *net)
 		.hashfn = jhash,
 		.max_shift = 20, /* 1M */
 		.min_shift = 8,  /* 256 */
-		.grow_decision = rht_grow_above_75,
-		.shrink_decision = rht_shrink_below_30,
 	};
 
 	return rhashtable_init(&tn->sk_rht, &rht_params);
-- 
cgit v1.2.3


From 140e049c64ce848392adbf4678983ecc76888dde Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Thu, 26 Feb 2015 17:42:42 -0500
Subject: NFS: Add a helper to set attribute barriers

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Tested-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfs/inode.c         | 16 ++++++++++++++++
 include/linux/nfs_fs.h |  1 +
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 83107be3dd01..b0cbc1ba82da 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1260,6 +1260,22 @@ void nfs_fattr_init(struct nfs_fattr *fattr)
 }
 EXPORT_SYMBOL_GPL(nfs_fattr_init);
 
+/**
+ * nfs_fattr_set_barrier
+ * @fattr: attributes
+ *
+ * Used to set a barrier after an attribute was updated. This
+ * barrier ensures that older attributes from RPC calls that may
+ * have raced with our update cannot clobber these new values.
+ * Note that you are still responsible for ensuring that other
+ * operations which change the attribute on the server do not
+ * collide.
+ */
+void nfs_fattr_set_barrier(struct nfs_fattr *fattr)
+{
+	fattr->gencount = nfs_inc_attr_generation_counter();
+}
+
 struct nfs_fattr *nfs_alloc_fattr(void)
 {
 	struct nfs_fattr *fattr;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 2f77e0c651c8..3a4ffb5856cd 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -369,6 +369,7 @@ extern struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ct
 extern void nfs_put_lock_context(struct nfs_lock_context *l_ctx);
 extern u64 nfs_compat_user_ino64(u64 fileid);
 extern void nfs_fattr_init(struct nfs_fattr *fattr);
+extern void nfs_fattr_set_barrier(struct nfs_fattr *fattr);
 extern unsigned long nfs_inc_attr_generation_counter(void);
 
 extern struct nfs_fattr *nfs_alloc_fattr(void);
-- 
cgit v1.2.3


From f044636d972246d451e06226cc1675d5da389762 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Thu, 26 Feb 2015 16:09:04 -0500
Subject: NFS: Add attribute update barriers to nfs_setattr_update_inode()

Ensure that other operations which raced with our setattr RPC call
cannot revert the file attribute changes that were made on the server.
To do so, we artificially bump the attribute generation counter on
the inode so that all calls to nfs_fattr_init() that precede ours
will be dropped.

The motivation for the patch came from Chuck Lever's reports of readaheads
racing with truncate operations and causing the file size to be reverted.

Reported-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Tested-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfs/inode.c         | 17 ++++++++++++-----
 fs/nfs/nfs3proc.c      |  2 +-
 fs/nfs/nfs4proc.c      |  6 +++---
 fs/nfs/proc.c          |  2 +-
 include/linux/nfs_fs.h |  2 +-
 5 files changed, 18 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index b0cbc1ba82da..3a2d127de499 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -556,6 +556,7 @@ EXPORT_SYMBOL_GPL(nfs_setattr);
  * This is a copy of the common vmtruncate, but with the locking
  * corrected to take into account the fact that NFS requires
  * inode->i_size to be updated under the inode->i_lock.
+ * Note: must be called with inode->i_lock held!
  */
 static int nfs_vmtruncate(struct inode * inode, loff_t offset)
 {
@@ -565,14 +566,14 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset)
 	if (err)
 		goto out;
 
-	spin_lock(&inode->i_lock);
 	i_size_write(inode, offset);
 	/* Optimisation */
 	if (offset == 0)
 		NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA;
-	spin_unlock(&inode->i_lock);
 
+	spin_unlock(&inode->i_lock);
 	truncate_pagecache(inode, offset);
+	spin_lock(&inode->i_lock);
 out:
 	return err;
 }
@@ -585,10 +586,15 @@ out:
  * Note: we do this in the *proc.c in order to ensure that
  *       it works for things like exclusive creates too.
  */
-void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
+void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
+		struct nfs_fattr *fattr)
 {
+	/* Barrier: bump the attribute generation count. */
+	nfs_fattr_set_barrier(fattr);
+
+	spin_lock(&inode->i_lock);
+	NFS_I(inode)->attr_gencount = fattr->gencount;
 	if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) {
-		spin_lock(&inode->i_lock);
 		if ((attr->ia_valid & ATTR_MODE) != 0) {
 			int mode = attr->ia_mode & S_IALLUGO;
 			mode |= inode->i_mode & ~S_IALLUGO;
@@ -600,12 +606,13 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
 			inode->i_gid = attr->ia_gid;
 		nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS
 				| NFS_INO_INVALID_ACL);
-		spin_unlock(&inode->i_lock);
 	}
 	if ((attr->ia_valid & ATTR_SIZE) != 0) {
 		nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
 		nfs_vmtruncate(inode, attr->ia_size);
 	}
+	nfs_update_inode(inode, fattr);
+	spin_unlock(&inode->i_lock);
 }
 EXPORT_SYMBOL_GPL(nfs_setattr_update_inode);
 
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 78e557c3ab87..11109a137c0c 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -138,7 +138,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 	nfs_fattr_init(fattr);
 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	if (status == 0)
-		nfs_setattr_update_inode(inode, sattr);
+		nfs_setattr_update_inode(inode, sattr, fattr);
 	dprintk("NFS reply setattr: %d\n", status);
 	return status;
 }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4e41340e957d..c499e02a58ca 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2416,8 +2416,8 @@ static int _nfs4_do_open(struct inode *dir,
 				opendata->o_res.f_attr, sattr,
 				state, label, olabel);
 		if (status == 0) {
-			nfs_setattr_update_inode(state->inode, sattr);
-			nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr);
+			nfs_setattr_update_inode(state->inode, sattr,
+					opendata->o_res.f_attr);
 			nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel);
 		}
 	}
@@ -3291,7 +3291,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 
 	status = nfs4_do_setattr(inode, cred, fattr, sattr, state, NULL, label);
 	if (status == 0) {
-		nfs_setattr_update_inode(inode, sattr);
+		nfs_setattr_update_inode(inode, sattr, fattr);
 		nfs_setsecurity(inode, fattr, label);
 	}
 	nfs4_label_free(label);
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index b09cc23d6f43..6202bc0f11bb 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -139,7 +139,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 	nfs_fattr_init(fattr);
 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	if (status == 0)
-		nfs_setattr_update_inode(inode, sattr);
+		nfs_setattr_update_inode(inode, sattr, fattr);
 	dprintk("NFS reply setattr: %d\n", status);
 	return status;
 }
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 3a4ffb5856cd..f26e64e0aff8 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -356,7 +356,7 @@ extern int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *ino
 extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
 extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping);
 extern int nfs_setattr(struct dentry *, struct iattr *);
-extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr);
+extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, struct nfs_fattr *);
 extern void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr,
 				struct nfs4_label *label);
 extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
-- 
cgit v1.2.3


From a08a8cd375db9769588257e7782f6b6b68561b88 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Thu, 26 Feb 2015 17:36:09 -0500
Subject: NFS: Add attribute update barriers to NFS writebacks

Ensure that other operations that race with our write RPC calls
cannot revert the file size updates that were made on the server.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Tested-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfs/inode.c         | 25 ++++++++++++++++++++++---
 fs/nfs/internal.h      |  1 +
 fs/nfs/nfs3proc.c      |  2 +-
 fs/nfs/nfs4proc.c      |  2 +-
 fs/nfs/proc.c          |  4 +---
 fs/nfs/write.c         | 30 ++++++++++++++++++++++++++++++
 include/linux/nfs_fs.h |  1 +
 7 files changed, 57 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 299bf7171a4d..ff9a6795da46 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1491,7 +1491,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 EXPORT_SYMBOL_GPL(nfs_post_op_update_inode);
 
 /**
- * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache
+ * nfs_post_op_update_inode_force_wcc_locked - update the inode attribute cache
  * @inode - pointer to inode
  * @fattr - updated attributes
  *
@@ -1501,11 +1501,10 @@ EXPORT_SYMBOL_GPL(nfs_post_op_update_inode);
  *
  * This function is mainly designed to be used by the ->write_done() functions.
  */
-int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr)
+int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr)
 {
 	int status;
 
-	spin_lock(&inode->i_lock);
 	/* Don't do a WCC update if these attributes are already stale */
 	if ((fattr->valid & NFS_ATTR_FATTR) == 0 ||
 			!nfs_inode_attrs_need_update(inode, fattr)) {
@@ -1537,6 +1536,26 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa
 	}
 out_noforce:
 	status = nfs_post_op_update_inode_locked(inode, fattr);
+	return status;
+}
+
+/**
+ * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache
+ * @inode - pointer to inode
+ * @fattr - updated attributes
+ *
+ * After an operation that has changed the inode metadata, mark the
+ * attribute cache as being invalid, then try to update it. Fake up
+ * weak cache consistency data, if none exist.
+ *
+ * This function is mainly designed to be used by the ->write_done() functions.
+ */
+int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr)
+{
+	int status;
+
+	spin_lock(&inode->i_lock);
+	status = nfs_post_op_update_inode_force_wcc_locked(inode, fattr);
 	spin_unlock(&inode->i_lock);
 	return status;
 }
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index b802fb3a2d99..9e6475bc5ba2 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -459,6 +459,7 @@ void nfs_mark_request_commit(struct nfs_page *req,
 			     struct nfs_commit_info *cinfo,
 			     u32 ds_commit_idx);
 int nfs_write_need_commit(struct nfs_pgio_header *);
+void nfs_writeback_update_inode(struct nfs_pgio_header *hdr);
 int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
 			    int how, struct nfs_commit_info *cinfo);
 void nfs_retry_commit(struct list_head *page_list,
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 11109a137c0c..1f11d2533ee4 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -834,7 +834,7 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
 	if (nfs3_async_handle_jukebox(task, inode))
 		return -EAGAIN;
 	if (task->tk_status >= 0)
-		nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr);
+		nfs_writeback_update_inode(hdr);
 	return 0;
 }
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c499e02a58ca..b022e64b76a5 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4237,7 +4237,7 @@ static int nfs4_write_done_cb(struct rpc_task *task,
 	}
 	if (task->tk_status >= 0) {
 		renew_lease(NFS_SERVER(inode), hdr->timestamp);
-		nfs_post_op_update_inode_force_wcc(inode, &hdr->fattr);
+		nfs_writeback_update_inode(hdr);
 	}
 	return 0;
 }
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 6202bc0f11bb..c63189acd052 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -609,10 +609,8 @@ static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task,
 
 static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
 {
-	struct inode *inode = hdr->inode;
-
 	if (task->tk_status >= 0)
-		nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr);
+		nfs_writeback_update_inode(hdr);
 	return 0;
 }
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 595d81e354d1..849ed784d6ac 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1377,6 +1377,36 @@ static int nfs_should_remove_suid(const struct inode *inode)
 	return 0;
 }
 
+static void nfs_writeback_check_extend(struct nfs_pgio_header *hdr,
+		struct nfs_fattr *fattr)
+{
+	struct nfs_pgio_args *argp = &hdr->args;
+	struct nfs_pgio_res *resp = &hdr->res;
+
+	if (!(fattr->valid & NFS_ATTR_FATTR_SIZE))
+		return;
+	if (argp->offset + resp->count != fattr->size)
+		return;
+	if (nfs_size_to_loff_t(fattr->size) < i_size_read(hdr->inode))
+		return;
+	/* Set attribute barrier */
+	nfs_fattr_set_barrier(fattr);
+}
+
+void nfs_writeback_update_inode(struct nfs_pgio_header *hdr)
+{
+	struct nfs_fattr *fattr = hdr->res.fattr;
+	struct inode *inode = hdr->inode;
+
+	if (fattr == NULL)
+		return;
+	spin_lock(&inode->i_lock);
+	nfs_writeback_check_extend(hdr, fattr);
+	nfs_post_op_update_inode_force_wcc_locked(inode, fattr);
+	spin_unlock(&inode->i_lock);
+}
+EXPORT_SYMBOL_GPL(nfs_writeback_update_inode);
+
 /*
  * This function is called when the WRITE call is complete.
  */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index f26e64e0aff8..59b1516b9fd4 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -343,6 +343,7 @@ extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *,
 extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
 extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
 extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr);
+extern int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr);
 extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
 extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *);
 extern void nfs_access_set_mask(struct nfs_access_entry *, u32);
-- 
cgit v1.2.3


From f5956fafb00afab474c3886b6297f9b5e7aff722 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Mon, 2 Mar 2015 18:22:15 +0200
Subject: net/mlx4_core: Fix wrong mask and error flow for the update-qp
 command

The bit mask for currently supported driver features (MLX4_UPDATE_QP_SUPPORTED_ATTRS)
of the update-qp command was defined twice (using enum value and pre-processor
define directive) and wrong.

The return value of the call to mlx4_update_qp() from within the SRIOV
resource-tracker was wrongly voided down.

Fix both issues.

issue: none
Fixes: 09e05c3f78e9 ('net/mlx4: Set vlan stripping policy by the right command')
Fixes: ce8d9e0d6746 ('net/mlx4_core: Add UPDATE_QP SRIOV wrapper support')
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/qp.c               | 1 -
 drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 9 ++++++---
 include/linux/mlx4/qp.h                               | 2 +-
 3 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 2bb8553bd905..eda29dbbfcd2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -412,7 +412,6 @@ err_icm:
 
 EXPORT_SYMBOL_GPL(mlx4_qp_alloc);
 
-#define MLX4_UPDATE_QP_SUPPORTED_ATTRS MLX4_UPDATE_QP_SMAC
 int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn,
 		   enum mlx4_update_qp_attr attr,
 		   struct mlx4_update_qp_params *params)
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 486e3d26cd4a..d97ca88c55b5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -713,7 +713,7 @@ static int update_vport_qp_param(struct mlx4_dev *dev,
 	struct mlx4_vport_oper_state *vp_oper;
 	struct mlx4_priv *priv;
 	u32 qp_type;
-	int port;
+	int port, err = 0;
 
 	port = (qpc->pri_path.sched_queue & 0x40) ? 2 : 1;
 	priv = mlx4_priv(dev);
@@ -738,7 +738,9 @@ static int update_vport_qp_param(struct mlx4_dev *dev,
 			} else {
 				struct mlx4_update_qp_params params = {.flags = 0};
 
-				mlx4_update_qp(dev, qpn, MLX4_UPDATE_QP_VSD, &params);
+				err = mlx4_update_qp(dev, qpn, MLX4_UPDATE_QP_VSD, &params);
+				if (err)
+					goto out;
 			}
 		}
 
@@ -773,7 +775,8 @@ static int update_vport_qp_param(struct mlx4_dev *dev,
 		qpc->pri_path.feup |= MLX4_FSM_FORCE_ETH_SRC_MAC;
 		qpc->pri_path.grh_mylmc = (0x80 & qpc->pri_path.grh_mylmc) + vp_oper->mac_idx;
 	}
-	return 0;
+out:
+	return err;
 }
 
 static int mpt_mask(struct mlx4_dev *dev)
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 2bbc62aa818a..551f85456c11 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -427,7 +427,7 @@ struct mlx4_wqe_inline_seg {
 
 enum mlx4_update_qp_attr {
 	MLX4_UPDATE_QP_SMAC		= 1 << 0,
-	MLX4_UPDATE_QP_VSD		= 1 << 2,
+	MLX4_UPDATE_QP_VSD		= 1 << 1,
 	MLX4_UPDATE_QP_SUPPORTED_ATTRS	= (1 << 2) - 1
 };
 
-- 
cgit v1.2.3


From c6331ba3d2d68758f36dbc3e09e648d312c24d97 Mon Sep 17 00:00:00 2001
From: Marcin Bis <marcin@bis.org.pl>
Date: Sun, 1 Mar 2015 13:49:32 +0100
Subject: spi: fix a typo in comment.

alway -> always

Signed-off-by: Marcin Bis <marcin@bis.org.pl>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index ed9489d893a4..856d34dde79b 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -649,7 +649,7 @@ struct spi_transfer {
  * sequence completes.  On some systems, many such sequences can execute as
  * as single programmed DMA transfer.  On all systems, these messages are
  * queued, and might complete after transactions to other devices.  Messages
- * sent to a given spi_device are alway executed in FIFO order.
+ * sent to a given spi_device are always executed in FIFO order.
  *
  * The code that submits an spi_message (and its spi_transfers)
  * to the lower layers is responsible for managing its memory.
-- 
cgit v1.2.3


From 874f946376de57c8d6230b30ad71f742883fee3a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Mon, 2 Mar 2015 23:32:08 -0500
Subject: NFS: Fix a regression in the read() syscall

When invalidating the page cache for a regular file, we want to first
sync all dirty data to disk and then call invalidate_inode_pages2().
The latter relies on nfs_launder_page() and nfs_release_page() to deal
respectively with dirty pages, and unstable written pages.

When commit 9590544694bec ("NFS: avoid deadlocks with loop-back mounted
NFS filesystems.") changed the behaviour of nfs_release_page(), then it
made it possible for invalidate_inode_pages2() to fail with an EBUSY.
Unfortunately, that error is then propagated back to read().

Let's therefore work around the problem for now by protecting the call
to sync the data and invalidate_inode_pages2() so that they are atomic
w.r.t. the addition of new writes.
Later on, we can revisit whether or not we still need nfs_launder_page()
and nfs_release_page().

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/file.c          |  4 ++--
 fs/nfs/inode.c         | 37 ++++++++++++++++++++++++++++++++++---
 include/linux/nfs_fs.h |  1 +
 3 files changed, 37 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index c045c7169fa0..41963ffca597 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -178,7 +178,7 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
 		iocb->ki_filp,
 		iov_iter_count(to), (unsigned long) iocb->ki_pos);
 
-	result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
+	result = nfs_revalidate_mapping_protected(inode, iocb->ki_filp->f_mapping);
 	if (!result) {
 		result = generic_file_read_iter(iocb, to);
 		if (result > 0)
@@ -199,7 +199,7 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos,
 	dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n",
 		filp, (unsigned long) count, (unsigned long long) *ppos);
 
-	res = nfs_revalidate_mapping(inode, filp->f_mapping);
+	res = nfs_revalidate_mapping_protected(inode, filp->f_mapping);
 	if (!res) {
 		res = generic_file_splice_read(filp, ppos, pipe, count, flags);
 		if (res > 0)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 5026c44a98e1..8edb7d049565 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1067,11 +1067,14 @@ static bool nfs_mapping_need_revalidate_inode(struct inode *inode)
 }
 
 /**
- * nfs_revalidate_mapping - Revalidate the pagecache
+ * __nfs_revalidate_mapping - Revalidate the pagecache
  * @inode - pointer to host inode
  * @mapping - pointer to mapping
+ * @may_lock - take inode->i_mutex?
  */
-int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
+static int __nfs_revalidate_mapping(struct inode *inode,
+		struct address_space *mapping,
+		bool may_lock)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	unsigned long *bitlock = &nfsi->flags;
@@ -1120,7 +1123,12 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
 	nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
 	spin_unlock(&inode->i_lock);
 	trace_nfs_invalidate_mapping_enter(inode);
-	ret = nfs_invalidate_mapping(inode, mapping);
+	if (may_lock) {
+		mutex_lock(&inode->i_mutex);
+		ret = nfs_invalidate_mapping(inode, mapping);
+		mutex_unlock(&inode->i_mutex);
+	} else
+		ret = nfs_invalidate_mapping(inode, mapping);
 	trace_nfs_invalidate_mapping_exit(inode, ret);
 
 	clear_bit_unlock(NFS_INO_INVALIDATING, bitlock);
@@ -1130,6 +1138,29 @@ out:
 	return ret;
 }
 
+/**
+ * nfs_revalidate_mapping - Revalidate the pagecache
+ * @inode - pointer to host inode
+ * @mapping - pointer to mapping
+ */
+int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
+{
+	return __nfs_revalidate_mapping(inode, mapping, false);
+}
+
+/**
+ * nfs_revalidate_mapping_protected - Revalidate the pagecache
+ * @inode - pointer to host inode
+ * @mapping - pointer to mapping
+ *
+ * Differs from nfs_revalidate_mapping() in that it grabs the inode->i_mutex
+ * while invalidating the mapping.
+ */
+int nfs_revalidate_mapping_protected(struct inode *inode, struct address_space *mapping)
+{
+	return __nfs_revalidate_mapping(inode, mapping, true);
+}
+
 static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 59b1516b9fd4..b01ccf371fdc 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -356,6 +356,7 @@ extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode);
 extern int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode);
 extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
 extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping);
+extern int nfs_revalidate_mapping_protected(struct inode *inode, struct address_space *mapping);
 extern int nfs_setattr(struct dentry *, struct iattr *);
 extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, struct nfs_fattr *);
 extern void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr,
-- 
cgit v1.2.3


From 17f480342026e54000731acaa69bf32787ce46cb Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 27 Feb 2015 00:07:55 +0100
Subject: genirq / PM: Add flag for shared NO_SUSPEND interrupt lines

It currently is required that all users of NO_SUSPEND interrupt
lines pass the IRQF_NO_SUSPEND flag when requesting the IRQ or the
WARN_ON_ONCE() in irq_pm_install_action() will trigger.  That is
done to warn about situations in which unprepared interrupt handlers
may be run unnecessarily for suspended devices and may attempt to
access those devices by mistake.  However, it may cause drivers
that have no technical reasons for using IRQF_NO_SUSPEND to set
that flag just because they happen to share the interrupt line
with something like a timer.

Moreover, the generic handling of wakeup interrupts introduced by
commit 9ce7a25849e8 (genirq: Simplify wakeup mechanism) only works
for IRQs without any NO_SUSPEND users, so the drivers of wakeup
devices needing to use shared NO_SUSPEND interrupt lines for
signaling system wakeup generally have to detect wakeup in their
interrupt handlers.  Thus if they happen to share an interrupt line
with a NO_SUSPEND user, they also need to request that their
interrupt handlers be run after suspend_device_irqs().

In both cases the reason for using IRQF_NO_SUSPEND is not because
the driver in question has a genuine need to run its interrupt
handler after suspend_device_irqs(), but because it happens to
share the line with some other NO_SUSPEND user.  Otherwise, the
driver would do without IRQF_NO_SUSPEND just fine.

To make it possible to specify that condition explicitly, introduce
a new IRQ action handler flag for shared IRQs, IRQF_COND_SUSPEND,
that, when set, will indicate to the IRQ core that the interrupt
user is generally fine with suspending the IRQ, but it also can
tolerate handler invocations after suspend_device_irqs() and, in
particular, it is capable of detecting system wakeup and triggering
it as appropriate from its interrupt handler.

That will allow us to work around a problem with a shared timer
interrupt line on at91 platforms.

Link: http://marc.info/?l=linux-kernel&m=142252777602084&w=2
Link: http://marc.info/?t=142252775300011&r=1&w=2
Link: https://lkml.org/lkml/2014/12/15/552
Reported-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
---
 include/linux/interrupt.h | 5 +++++
 include/linux/irqdesc.h   | 1 +
 kernel/irq/manage.c       | 7 ++++++-
 kernel/irq/pm.c           | 7 ++++++-
 4 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 606771c7cac2..2e88580194f0 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -59,6 +59,10 @@
  * IRQF_NO_THREAD - Interrupt cannot be threaded
  * IRQF_EARLY_RESUME - Resume IRQ early during syscore instead of at device
  *                resume time.
+ * IRQF_COND_SUSPEND - If the IRQ is shared with a NO_SUSPEND user, execute this
+ *                interrupt handler after suspending interrupts. For system
+ *                wakeup devices users need to implement wakeup detection in
+ *                their interrupt handlers.
  */
 #define IRQF_DISABLED		0x00000020
 #define IRQF_SHARED		0x00000080
@@ -72,6 +76,7 @@
 #define IRQF_FORCE_RESUME	0x00008000
 #define IRQF_NO_THREAD		0x00010000
 #define IRQF_EARLY_RESUME	0x00020000
+#define IRQF_COND_SUSPEND	0x00040000
 
 #define IRQF_TIMER		(__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)
 
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index faf433af425e..dd1109fb241e 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -78,6 +78,7 @@ struct irq_desc {
 #ifdef CONFIG_PM_SLEEP
 	unsigned int		nr_actions;
 	unsigned int		no_suspend_depth;
+	unsigned int		cond_suspend_depth;
 	unsigned int		force_resume_depth;
 #endif
 #ifdef CONFIG_PROC_FS
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 196a06fbc122..886d09e691d5 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1474,8 +1474,13 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
 	 * otherwise we'll have trouble later trying to figure out
 	 * which interrupt is which (messes up the interrupt freeing
 	 * logic etc).
+	 *
+	 * Also IRQF_COND_SUSPEND only makes sense for shared interrupts and
+	 * it cannot be set along with IRQF_NO_SUSPEND.
 	 */
-	if ((irqflags & IRQF_SHARED) && !dev_id)
+	if (((irqflags & IRQF_SHARED) && !dev_id) ||
+	    (!(irqflags & IRQF_SHARED) && (irqflags & IRQF_COND_SUSPEND)) ||
+	    ((irqflags & IRQF_NO_SUSPEND) && (irqflags & IRQF_COND_SUSPEND)))
 		return -EINVAL;
 
 	desc = irq_to_desc(irq);
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index 3ca532592704..5204a6d1b985 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -43,9 +43,12 @@ void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action)
 
 	if (action->flags & IRQF_NO_SUSPEND)
 		desc->no_suspend_depth++;
+	else if (action->flags & IRQF_COND_SUSPEND)
+		desc->cond_suspend_depth++;
 
 	WARN_ON_ONCE(desc->no_suspend_depth &&
-		     desc->no_suspend_depth != desc->nr_actions);
+		     (desc->no_suspend_depth +
+			desc->cond_suspend_depth) != desc->nr_actions);
 }
 
 /*
@@ -61,6 +64,8 @@ void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action)
 
 	if (action->flags & IRQF_NO_SUSPEND)
 		desc->no_suspend_depth--;
+	else if (action->flags & IRQF_COND_SUSPEND)
+		desc->cond_suspend_depth--;
 }
 
 static bool suspend_device_irq(struct irq_desc *desc, int irq)
-- 
cgit v1.2.3


From 40eeb111d7c88bfbc38e1dfe330bc4cec05e0806 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 5 Mar 2015 10:08:14 +0100
Subject: Revert "pinctrl: consumer: use correct retval for placeholder
 functions"

This reverts commit 5a7d2efdd93f6c4bb6cd3d5df3d2f5611c9b87ac.

As per discussion on the mailing list, this is not the right
thing to do. NULL cookies are valid in the stubs.

Reported-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/pinctrl/consumer.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pinctrl/consumer.h b/include/linux/pinctrl/consumer.h
index 72c0415d6c21..18eccefea06e 100644
--- a/include/linux/pinctrl/consumer.h
+++ b/include/linux/pinctrl/consumer.h
@@ -82,7 +82,7 @@ static inline int pinctrl_gpio_direction_output(unsigned gpio)
 
 static inline struct pinctrl * __must_check pinctrl_get(struct device *dev)
 {
-	return ERR_PTR(-ENOSYS);
+	return NULL;
 }
 
 static inline void pinctrl_put(struct pinctrl *p)
@@ -93,7 +93,7 @@ static inline struct pinctrl_state * __must_check pinctrl_lookup_state(
 							struct pinctrl *p,
 							const char *name)
 {
-	return ERR_PTR(-ENOSYS);
+	return NULL;
 }
 
 static inline int pinctrl_select_state(struct pinctrl *p,
@@ -104,7 +104,7 @@ static inline int pinctrl_select_state(struct pinctrl *p,
 
 static inline struct pinctrl * __must_check devm_pinctrl_get(struct device *dev)
 {
-	return ERR_PTR(-ENOSYS);
+	return NULL;
 }
 
 static inline void devm_pinctrl_put(struct pinctrl *p)
-- 
cgit v1.2.3


From 8603e1b30027f943cc9c1eef2b291d42c3347af1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 5 Mar 2015 08:04:13 -0500
Subject: workqueue: fix hang involving racing cancel[_delayed]_work_sync()'s
 for PREEMPT_NONE

cancel[_delayed]_work_sync() are implemented using
__cancel_work_timer() which grabs the PENDING bit using
try_to_grab_pending() and then flushes the work item with PENDING set
to prevent the on-going execution of the work item from requeueing
itself.

try_to_grab_pending() can always grab PENDING bit without blocking
except when someone else is doing the above flushing during
cancelation.  In that case, try_to_grab_pending() returns -ENOENT.  In
this case, __cancel_work_timer() currently invokes flush_work().  The
assumption is that the completion of the work item is what the other
canceling task would be waiting for too and thus waiting for the same
condition and retrying should allow forward progress without excessive
busy looping

Unfortunately, this doesn't work if preemption is disabled or the
latter task has real time priority.  Let's say task A just got woken
up from flush_work() by the completion of the target work item.  If,
before task A starts executing, task B gets scheduled and invokes
__cancel_work_timer() on the same work item, its try_to_grab_pending()
will return -ENOENT as the work item is still being canceled by task A
and flush_work() will also immediately return false as the work item
is no longer executing.  This puts task B in a busy loop possibly
preventing task A from executing and clearing the canceling state on
the work item leading to a hang.

task A			task B			worker

						executing work
__cancel_work_timer()
  try_to_grab_pending()
  set work CANCELING
  flush_work()
    block for work completion
						completion, wakes up A
			__cancel_work_timer()
			while (forever) {
			  try_to_grab_pending()
			    -ENOENT as work is being canceled
			  flush_work()
			    false as work is no longer executing
			}

This patch removes the possible hang by updating __cancel_work_timer()
to explicitly wait for clearing of CANCELING rather than invoking
flush_work() after try_to_grab_pending() fails with -ENOENT.

Link: http://lkml.kernel.org/g/20150206171156.GA8942@axis.com

v3: bit_waitqueue() can't be used for work items defined in vmalloc
    area.  Switched to custom wake function which matches the target
    work item and exclusive wait and wakeup.

v2: v1 used wake_up() on bit_waitqueue() which leads to NULL deref if
    the target bit waitqueue has wait_bit_queue's on it.  Use
    DEFINE_WAIT_BIT() and __wake_up_bit() instead.  Reported by Tomeu
    Vizoso.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Rabin Vincent <rabin.vincent@axis.com>
Cc: Tomeu Vizoso <tomeu.vizoso@gmail.com>
Cc: stable@vger.kernel.org
Tested-by: Jesper Nilsson <jesper.nilsson@axis.com>
Tested-by: Rabin Vincent <rabin.vincent@axis.com>
---
 include/linux/workqueue.h |  3 ++-
 kernel/workqueue.c        | 56 +++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 54 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 74db135f9957..f597846ff605 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -70,7 +70,8 @@ enum {
 	/* data contains off-queue information when !WORK_STRUCT_PWQ */
 	WORK_OFFQ_FLAG_BASE	= WORK_STRUCT_COLOR_SHIFT,
 
-	WORK_OFFQ_CANCELING	= (1 << WORK_OFFQ_FLAG_BASE),
+	__WORK_OFFQ_CANCELING	= WORK_OFFQ_FLAG_BASE,
+	WORK_OFFQ_CANCELING	= (1 << __WORK_OFFQ_CANCELING),
 
 	/*
 	 * When a work item is off queue, its high bits point to the last
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f28849394791..41ff75b478c6 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2728,19 +2728,57 @@ bool flush_work(struct work_struct *work)
 }
 EXPORT_SYMBOL_GPL(flush_work);
 
+struct cwt_wait {
+	wait_queue_t		wait;
+	struct work_struct	*work;
+};
+
+static int cwt_wakefn(wait_queue_t *wait, unsigned mode, int sync, void *key)
+{
+	struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait);
+
+	if (cwait->work != key)
+		return 0;
+	return autoremove_wake_function(wait, mode, sync, key);
+}
+
 static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
 {
+	static DECLARE_WAIT_QUEUE_HEAD(cancel_waitq);
 	unsigned long flags;
 	int ret;
 
 	do {
 		ret = try_to_grab_pending(work, is_dwork, &flags);
 		/*
-		 * If someone else is canceling, wait for the same event it
-		 * would be waiting for before retrying.
+		 * If someone else is already canceling, wait for it to
+		 * finish.  flush_work() doesn't work for PREEMPT_NONE
+		 * because we may get scheduled between @work's completion
+		 * and the other canceling task resuming and clearing
+		 * CANCELING - flush_work() will return false immediately
+		 * as @work is no longer busy, try_to_grab_pending() will
+		 * return -ENOENT as @work is still being canceled and the
+		 * other canceling task won't be able to clear CANCELING as
+		 * we're hogging the CPU.
+		 *
+		 * Let's wait for completion using a waitqueue.  As this
+		 * may lead to the thundering herd problem, use a custom
+		 * wake function which matches @work along with exclusive
+		 * wait and wakeup.
 		 */
-		if (unlikely(ret == -ENOENT))
-			flush_work(work);
+		if (unlikely(ret == -ENOENT)) {
+			struct cwt_wait cwait;
+
+			init_wait(&cwait.wait);
+			cwait.wait.func = cwt_wakefn;
+			cwait.work = work;
+
+			prepare_to_wait_exclusive(&cancel_waitq, &cwait.wait,
+						  TASK_UNINTERRUPTIBLE);
+			if (work_is_canceling(work))
+				schedule();
+			finish_wait(&cancel_waitq, &cwait.wait);
+		}
 	} while (unlikely(ret < 0));
 
 	/* tell other tasks trying to grab @work to back off */
@@ -2749,6 +2787,16 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
 
 	flush_work(work);
 	clear_work_data(work);
+
+	/*
+	 * Paired with prepare_to_wait() above so that either
+	 * waitqueue_active() is visible here or !work_is_canceling() is
+	 * visible there.
+	 */
+	smp_mb();
+	if (waitqueue_active(&cancel_waitq))
+		__wake_up(&cancel_waitq, TASK_NORMAL, 1, work);
+
 	return ret;
 }
 
-- 
cgit v1.2.3


From ef2b22ac540c018bd574d1846ab95b9bfcf38702 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 2 Mar 2015 22:26:55 +0100
Subject: cpuidle / sleep: Use broadcast timer for states that stop local timer

Commit 381063133246 (PM / sleep: Re-implement suspend-to-idle handling)
overlooked the fact that entering some sufficiently deep idle states
by CPUs may cause their local timers to stop and in those cases it
is necessary to switch over to a broadcast timer prior to entering
the idle state.  If the cpuidle driver in use does not provide
the new ->enter_freeze callback for any of the idle states, that
problem affects suspend-to-idle too, but it is not taken into account
after the changes made by commit 381063133246.

Fix that by changing the definition of cpuidle_enter_freeze() and
re-arranging of the code in cpuidle_idle_call(), so the former does
not call cpuidle_enter() any more and the fallback case is handled
by cpuidle_idle_call() directly.

Fixes: 381063133246 (PM / sleep: Re-implement suspend-to-idle handling)
Reported-and-tested-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 drivers/cpuidle/cpuidle.c | 62 +++++++++++++++++------------------------------
 include/linux/cpuidle.h   | 17 +++++++++++--
 kernel/sched/idle.c       | 30 ++++++++++++++++-------
 3 files changed, 58 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 8b3e132b6a01..080bd2dbde4b 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -44,8 +44,8 @@ void disable_cpuidle(void)
 	off = 1;
 }
 
-static bool cpuidle_not_available(struct cpuidle_driver *drv,
-				  struct cpuidle_device *dev)
+bool cpuidle_not_available(struct cpuidle_driver *drv,
+			   struct cpuidle_device *dev)
 {
 	return off || !initialized || !drv || !dev || !dev->enabled;
 }
@@ -72,14 +72,8 @@ int cpuidle_play_dead(void)
 	return -ENODEV;
 }
 
-/**
- * cpuidle_find_deepest_state - Find deepest state meeting specific conditions.
- * @drv: cpuidle driver for the given CPU.
- * @dev: cpuidle device for the given CPU.
- * @freeze: Whether or not the state should be suitable for suspend-to-idle.
- */
-static int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
-				      struct cpuidle_device *dev, bool freeze)
+static int find_deepest_state(struct cpuidle_driver *drv,
+			      struct cpuidle_device *dev, bool freeze)
 {
 	unsigned int latency_req = 0;
 	int i, ret = freeze ? -1 : CPUIDLE_DRIVER_STATE_START - 1;
@@ -98,6 +92,17 @@ static int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
 	return ret;
 }
 
+/**
+ * cpuidle_find_deepest_state - Find the deepest available idle state.
+ * @drv: cpuidle driver for the given CPU.
+ * @dev: cpuidle device for the given CPU.
+ */
+int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
+			       struct cpuidle_device *dev)
+{
+	return find_deepest_state(drv, dev, false);
+}
+
 static void enter_freeze_proper(struct cpuidle_driver *drv,
 				struct cpuidle_device *dev, int index)
 {
@@ -119,46 +124,26 @@ static void enter_freeze_proper(struct cpuidle_driver *drv,
 
 /**
  * cpuidle_enter_freeze - Enter an idle state suitable for suspend-to-idle.
+ * @drv: cpuidle driver for the given CPU.
+ * @dev: cpuidle device for the given CPU.
  *
  * If there are states with the ->enter_freeze callback, find the deepest of
- * them and enter it with frozen tick.  Otherwise, find the deepest state
- * available and enter it normally.
- *
- * Returns with enabled interrupts.
+ * them and enter it with frozen tick.
  */
-void cpuidle_enter_freeze(void)
+int cpuidle_enter_freeze(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 {
-	struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
-	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
 	int index;
 
-	if (cpuidle_not_available(drv, dev))
-		goto fallback;
-
 	/*
 	 * Find the deepest state with ->enter_freeze present, which guarantees
 	 * that interrupts won't be enabled when it exits and allows the tick to
 	 * be frozen safely.
 	 */
-	index = cpuidle_find_deepest_state(drv, dev, true);
-	if (index >= 0) {
+	index = find_deepest_state(drv, dev, true);
+	if (index >= 0)
 		enter_freeze_proper(drv, dev, index);
-		local_irq_enable();
-		return;
-	}
 
-	/*
-	 * It is not safe to freeze the tick, find the deepest state available
-	 * at all and try to enter it normally.
-	 */
-	index = cpuidle_find_deepest_state(drv, dev, false);
-	if (index >= 0) {
-		cpuidle_enter(drv, dev, index);
-		return;
-	}
-
- fallback:
-	arch_cpu_idle();
+	return index;
 }
 
 /**
@@ -217,9 +202,6 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
  */
 int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 {
-	if (cpuidle_not_available(drv, dev))
-		return -ENODEV;
-
 	return cpuidle_curr_governor->select(drv, dev);
 }
 
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index f551a9299ac9..306178d7309f 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -126,6 +126,8 @@ struct cpuidle_driver {
 
 #ifdef CONFIG_CPU_IDLE
 extern void disable_cpuidle(void);
+extern bool cpuidle_not_available(struct cpuidle_driver *drv,
+				  struct cpuidle_device *dev);
 
 extern int cpuidle_select(struct cpuidle_driver *drv,
 			  struct cpuidle_device *dev);
@@ -150,11 +152,17 @@ extern void cpuidle_resume(void);
 extern int cpuidle_enable_device(struct cpuidle_device *dev);
 extern void cpuidle_disable_device(struct cpuidle_device *dev);
 extern int cpuidle_play_dead(void);
-extern void cpuidle_enter_freeze(void);
+extern int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
+				      struct cpuidle_device *dev);
+extern int cpuidle_enter_freeze(struct cpuidle_driver *drv,
+				struct cpuidle_device *dev);
 
 extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev);
 #else
 static inline void disable_cpuidle(void) { }
+static inline bool cpuidle_not_available(struct cpuidle_driver *drv,
+					 struct cpuidle_device *dev)
+{return true; }
 static inline int cpuidle_select(struct cpuidle_driver *drv,
 				 struct cpuidle_device *dev)
 {return -ENODEV; }
@@ -183,7 +191,12 @@ static inline int cpuidle_enable_device(struct cpuidle_device *dev)
 {return -ENODEV; }
 static inline void cpuidle_disable_device(struct cpuidle_device *dev) { }
 static inline int cpuidle_play_dead(void) {return -ENODEV; }
-static inline void cpuidle_enter_freeze(void) { }
+static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
+					     struct cpuidle_device *dev)
+{return -ENODEV; }
+static inline int cpuidle_enter_freeze(struct cpuidle_driver *drv,
+				       struct cpuidle_device *dev)
+{return -ENODEV; }
 static inline struct cpuidle_driver *cpuidle_get_cpu_driver(
 	struct cpuidle_device *dev) {return NULL; }
 #endif
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 84b93b68482a..80014a178342 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -82,6 +82,7 @@ static void cpuidle_idle_call(void)
 	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
 	int next_state, entered_state;
 	unsigned int broadcast;
+	bool reflect;
 
 	/*
 	 * Check if the idle task must be rescheduled. If it is the
@@ -105,6 +106,9 @@ static void cpuidle_idle_call(void)
 	 */
 	rcu_idle_enter();
 
+	if (cpuidle_not_available(drv, dev))
+		goto use_default;
+
 	/*
 	 * Suspend-to-idle ("freeze") is a system state in which all user space
 	 * has been frozen, all I/O devices have been suspended and the only
@@ -115,15 +119,22 @@ static void cpuidle_idle_call(void)
 	 * until a proper wakeup interrupt happens.
 	 */
 	if (idle_should_freeze()) {
-		cpuidle_enter_freeze();
-		goto exit_idle;
-	}
+		entered_state = cpuidle_enter_freeze(drv, dev);
+		if (entered_state >= 0) {
+			local_irq_enable();
+			goto exit_idle;
+		}
 
-	/*
-	 * Ask the cpuidle framework to choose a convenient idle state.
-	 * Fall back to the default arch idle method on errors.
-	 */
-	next_state = cpuidle_select(drv, dev);
+		reflect = false;
+		next_state = cpuidle_find_deepest_state(drv, dev);
+	} else {
+		reflect = true;
+		/*
+		 * Ask the cpuidle framework to choose a convenient idle state.
+		 */
+		next_state = cpuidle_select(drv, dev);
+	}
+	/* Fall back to the default arch idle method on errors. */
 	if (next_state < 0)
 		goto use_default;
 
@@ -170,7 +181,8 @@ static void cpuidle_idle_call(void)
 	/*
 	 * Give the governor an opportunity to reflect on the outcome
 	 */
-	cpuidle_reflect(dev, entered_state);
+	if (reflect)
+		cpuidle_reflect(dev, entered_state);
 
 exit_idle:
 	__current_set_polling();
-- 
cgit v1.2.3


From 2bb785169e9709d41220e5c18b0270883a82f85c Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sun, 1 Mar 2015 10:18:16 -0500
Subject: serial: core: Fix iotype userspace breakage

commit 3ffb1a8193bea ("serial: core: Add big-endian iotype")
re-numbered userspace-dependent values; ioctl(TIOCSSERIAL) can
assign the port iotype (which is expected to match the selected
i/o accessors), so iotype values must not be changed.

Cc: Kevin Cernekee <cernekee@gmail.com>
Cc: <stable@vger.kernel.org> # 3.19+
Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Reviewed-by: Kevin Cernekee <cernekee@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_core.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index baf3e1d08416..1094f2d9cadb 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -147,9 +147,9 @@ struct uart_port {
 #define UPIO_HUB6		(1)			/* Hub6 ISA card */
 #define UPIO_MEM		(2)			/* 8b MMIO access */
 #define UPIO_MEM32		(3)			/* 32b little endian */
-#define UPIO_MEM32BE		(4)			/* 32b big endian */
-#define UPIO_AU			(5)			/* Au1x00 and RT288x type IO */
-#define UPIO_TSI		(6)			/* Tsi108/109 type IO */
+#define UPIO_AU			(4)			/* Au1x00 and RT288x type IO */
+#define UPIO_TSI		(5)			/* Tsi108/109 type IO */
+#define UPIO_MEM32BE		(6)			/* 32b big endian */
 
 	unsigned int		read_status_mask;	/* driver specific */
 	unsigned int		ignore_status_mask;	/* driver specific */
-- 
cgit v1.2.3


From 647f162b8e7e446c4bade031eb8a1a0a83d3de82 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sun, 1 Mar 2015 10:24:28 -0500
Subject: serial: uapi: Declare all userspace-visible io types

ioctl(TIOCGSERIAL|TIOCSSERIAL) report and can change the port->iotype.
UART drivers use the UPIO_* definitions, but the uapi header defines
parallel values and userspace uses these parallel values for ioctls;
thus the userspace values are definitive.

Define UPIO_* iotypes in terms of the uapi defines, SERIAL_IO_*;
extend the uapi defines to include all values in use by the serial
core.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_core.h | 14 +++++++-------
 include/uapi/linux/serial.h |  4 ++++
 2 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 1094f2d9cadb..d10965f0d8a4 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -143,13 +143,13 @@ struct uart_port {
 	unsigned char		iotype;			/* io access style */
 	unsigned char		unused1;
 
-#define UPIO_PORT		(0)			/* 8b I/O port access */
-#define UPIO_HUB6		(1)			/* Hub6 ISA card */
-#define UPIO_MEM		(2)			/* 8b MMIO access */
-#define UPIO_MEM32		(3)			/* 32b little endian */
-#define UPIO_AU			(4)			/* Au1x00 and RT288x type IO */
-#define UPIO_TSI		(5)			/* Tsi108/109 type IO */
-#define UPIO_MEM32BE		(6)			/* 32b big endian */
+#define UPIO_PORT		(SERIAL_IO_PORT)	/* 8b I/O port access */
+#define UPIO_HUB6		(SERIAL_IO_HUB6)	/* Hub6 ISA card */
+#define UPIO_MEM		(SERIAL_IO_MEM)		/* 8b MMIO access */
+#define UPIO_MEM32		(SERIAL_IO_MEM32)	/* 32b little endian */
+#define UPIO_AU			(SERIAL_IO_AU)		/* Au1x00 and RT288x type IO */
+#define UPIO_TSI		(SERIAL_IO_TSI)		/* Tsi108/109 type IO */
+#define UPIO_MEM32BE		(SERIAL_IO_MEM32BE)	/* 32b big endian */
 
 	unsigned int		read_status_mask;	/* driver specific */
 	unsigned int		ignore_status_mask;	/* driver specific */
diff --git a/include/uapi/linux/serial.h b/include/uapi/linux/serial.h
index 5e0d0ed61cf3..25331f9faa76 100644
--- a/include/uapi/linux/serial.h
+++ b/include/uapi/linux/serial.h
@@ -65,6 +65,10 @@ struct serial_struct {
 #define SERIAL_IO_PORT	0
 #define SERIAL_IO_HUB6	1
 #define SERIAL_IO_MEM	2
+#define SERIAL_IO_MEM32	  3
+#define SERIAL_IO_AU	  4
+#define SERIAL_IO_TSI	  5
+#define SERIAL_IO_MEM32BE 6
 
 #define UART_CLEAR_FIFO		0x01
 #define UART_USE_FIFO		0x02
-- 
cgit v1.2.3


From f54b97ed0b17d3da5f98ba8188cd5646415a922d Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 6 Mar 2015 16:37:41 +0000
Subject: irqchip: gicv3-its: Allocate enough memory for the full range of
 DeviceID

The ITS table allocator is only allocating a single page per table.
This works fine for most things, but leads to silent lack of
interrupt delivery if we end-up with a device that has an ID that is
out of the range defined by a single page of memory. Even worse, depending
on the page size, behaviour changes, which is not a very good experience.

A solution is actually to allocate memory for the full range of ID that
the ITS supports. A massive waste memory wise, but at least a safe bet.

Tested on a Phytium SoC.

Tested-by: Chen Baozi <chenbaozi@kylinos.com.cn>
Acked-by: Chen Baozi <chenbaozi@kylinos.com.cn>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Link: https://lkml.kernel.org/r/1425659870-11832-3-git-send-email-marc.zyngier@arm.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 drivers/irqchip/irq-gic-v3-its.c   | 25 +++++++++++++++++++++----
 include/linux/irqchip/arm-gic-v3.h |  2 ++
 2 files changed, 23 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index c217ebcf7a48..733b32fda390 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -806,14 +806,31 @@ static int its_alloc_tables(struct its_node *its)
 		u64 val = readq_relaxed(its->base + GITS_BASER + i * 8);
 		u64 type = GITS_BASER_TYPE(val);
 		u64 entry_size = GITS_BASER_ENTRY_SIZE(val);
+		int order = 0;
+		int alloc_size;
 		u64 tmp;
 		void *base;
 
 		if (type == GITS_BASER_TYPE_NONE)
 			continue;
 
-		/* We're lazy and only allocate a single page for now */
-		base = (void *)get_zeroed_page(GFP_KERNEL);
+		/*
+		 * Allocate as many entries as required to fit the
+		 * range of device IDs that the ITS can grok... The ID
+		 * space being incredibly sparse, this results in a
+		 * massive waste of memory.
+		 *
+		 * For other tables, only allocate a single page.
+		 */
+		if (type == GITS_BASER_TYPE_DEVICE) {
+			u64 typer = readq_relaxed(its->base + GITS_TYPER);
+			u32 ids = GITS_TYPER_DEVBITS(typer);
+
+			order = get_order((1UL << ids) * entry_size);
+		}
+
+		alloc_size = (1 << order) * PAGE_SIZE;
+		base = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
 		if (!base) {
 			err = -ENOMEM;
 			goto out_free;
@@ -841,7 +858,7 @@ retry_baser:
 			break;
 		}
 
-		val |= (PAGE_SIZE / psz) - 1;
+		val |= (alloc_size / psz) - 1;
 
 		writeq_relaxed(val, its->base + GITS_BASER + i * 8);
 		tmp = readq_relaxed(its->base + GITS_BASER + i * 8);
@@ -882,7 +899,7 @@ retry_baser:
 		}
 
 		pr_info("ITS: allocated %d %s @%lx (psz %dK, shr %d)\n",
-			(int)(PAGE_SIZE / entry_size),
+			(int)(alloc_size / entry_size),
 			its_base_type_string[type],
 			(unsigned long)virt_to_phys(base),
 			psz / SZ_1K, (int)shr >> GITS_BASER_SHAREABILITY_SHIFT);
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 800544bc7bfd..cbdd440d486d 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -166,6 +166,8 @@
 
 #define GITS_TRANSLATER			0x10040
 
+#define GITS_TYPER_DEVBITS_SHIFT	13
+#define GITS_TYPER_DEVBITS(r)		((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1)
 #define GITS_TYPER_PTA			(1UL << 19)
 
 #define GITS_CBASER_VALID		(1UL << 63)
-- 
cgit v1.2.3


From 7cb991164a46992a499ecdc77b17f8ac94bdb75f Mon Sep 17 00:00:00 2001
From: Yun Wu <wuyun.wu@huawei.com>
Date: Fri, 6 Mar 2015 16:37:49 +0000
Subject: irqchip: gicv3-its: Define macros for GITS_CTLR fields

Define macros for GITS_CTLR fields to avoid using magic numbers.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Yun Wu <wuyun.wu@huawei.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Link: https://lkml.kernel.org/r/1425659870-11832-11-git-send-email-marc.zyngier@arm.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 drivers/irqchip/irq-gic-v3-its.c   | 2 +-
 include/linux/irqchip/arm-gic-v3.h | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index ec20d4a942e0..826da706be4b 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -1388,7 +1388,7 @@ static int its_probe(struct device_node *node, struct irq_domain *parent)
 	writeq_relaxed(baser, its->base + GITS_CBASER);
 	tmp = readq_relaxed(its->base + GITS_CBASER);
 	writeq_relaxed(0, its->base + GITS_CWRITER);
-	writel_relaxed(1, its->base + GITS_CTLR);
+	writel_relaxed(GITS_CTLR_ENABLE, its->base + GITS_CTLR);
 
 	if ((tmp ^ baser) & GITS_BASER_SHAREABILITY_MASK) {
 		pr_info("ITS: using cache flushing for cmd queue\n");
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index cbdd440d486d..781974afff9f 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -166,6 +166,9 @@
 
 #define GITS_TRANSLATER			0x10040
 
+#define GITS_CTLR_ENABLE		(1U << 0)
+#define GITS_CTLR_QUIESCENT		(1U << 31)
+
 #define GITS_TYPER_DEVBITS_SHIFT	13
 #define GITS_TYPER_DEVBITS(r)		((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1)
 #define GITS_TYPER_PTA			(1UL << 19)
-- 
cgit v1.2.3


From 3d3801effda19b21012b5d1981e96cc277df85fd Mon Sep 17 00:00:00 2001
From: Michael Turquette <mturquette@linaro.org>
Date: Wed, 25 Feb 2015 09:11:01 -0800
Subject: clk: introduce clk_is_match

Some drivers compare struct clk pointers as a means of knowing
if the two pointers reference the same clock hardware. This behavior is
dubious (drivers must not dereference struct clk), but did not cause any
regressions until the per-user struct clk patch was merged. Now the test
for matching clk's will always fail with per-user struct clk's.

clk_is_match is introduced to fix the regression and prevent drivers
from comparing the pointers manually.

Fixes: 035a61c314eb ("clk: Make clk API return per-user struct clk instances")
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Shawn Guo <shawn.guo@linaro.org>
Cc: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Signed-off-by: Michael Turquette <mturquette@linaro.org>
[arnd@arndb.de: Fix COMMON_CLK=N && HAS_CLK=Y config]
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
[sboyd@codeaurora.org: const arguments to clk_is_match() and
remove unnecessary ternary operation]
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/clk.c   | 26 ++++++++++++++++++++++++++
 include/linux/clk.h | 18 ++++++++++++++++++
 2 files changed, 44 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index b9f85fc2ce3f..237f23f68bfc 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -2169,6 +2169,32 @@ int clk_get_phase(struct clk *clk)
 	return clk_core_get_phase(clk->core);
 }
 
+/**
+ * clk_is_match - check if two clk's point to the same hardware clock
+ * @p: clk compared against q
+ * @q: clk compared against p
+ *
+ * Returns true if the two struct clk pointers both point to the same hardware
+ * clock node. Put differently, returns true if struct clk *p and struct clk *q
+ * share the same struct clk_core object.
+ *
+ * Returns false otherwise. Note that two NULL clks are treated as matching.
+ */
+bool clk_is_match(const struct clk *p, const struct clk *q)
+{
+	/* trivial case: identical struct clk's or both NULL */
+	if (p == q)
+		return true;
+
+	/* true if clk->core pointers match. Avoid derefing garbage */
+	if (!IS_ERR_OR_NULL(p) && !IS_ERR_OR_NULL(q))
+		if (p->core == q->core)
+			return true;
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(clk_is_match);
+
 /**
  * __clk_init - initialize the data structures in a struct clk
  * @dev:	device initializing this clk, placeholder for now
diff --git a/include/linux/clk.h b/include/linux/clk.h
index 8381bbfbc308..68c16a6bedb3 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -125,6 +125,19 @@ int clk_set_phase(struct clk *clk, int degrees);
  */
 int clk_get_phase(struct clk *clk);
 
+/**
+ * clk_is_match - check if two clk's point to the same hardware clock
+ * @p: clk compared against q
+ * @q: clk compared against p
+ *
+ * Returns true if the two struct clk pointers both point to the same hardware
+ * clock node. Put differently, returns true if struct clk *p and struct clk *q
+ * share the same struct clk_core object.
+ *
+ * Returns false otherwise. Note that two NULL clks are treated as matching.
+ */
+bool clk_is_match(const struct clk *p, const struct clk *q);
+
 #else
 
 static inline long clk_get_accuracy(struct clk *clk)
@@ -142,6 +155,11 @@ static inline long clk_get_phase(struct clk *clk)
 	return -ENOTSUPP;
 }
 
+static inline bool clk_is_match(const struct clk *p, const struct clk *q)
+{
+	return p == q;
+}
+
 #endif
 
 /**
-- 
cgit v1.2.3


From c29390c6dfeee0944ac6b5610ebbe403944378fc Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 11 Mar 2015 18:42:02 -0700
Subject: xps: must clear sender_cpu before forwarding

John reported that my previous commit added a regression
on his router.

This is because sender_cpu & napi_id share a common location,
so get_xps_queue() can see garbage and perform an out of bound access.

We need to make sure sender_cpu is cleared before doing the transmit,
otherwise any NIC busy poll enabled (skb_mark_napi_id()) can trigger
this bug.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: John <jw@nuclearfallout.net>
Bisected-by: John <jw@nuclearfallout.net>
Fixes: 2bd82484bb4c ("xps: fix xps for stacked devices")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 7 +++++++
 net/core/skbuff.c      | 2 +-
 net/ipv4/ip_forward.c  | 1 +
 net/ipv6/ip6_output.c  | 1 +
 4 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 30007afe70b3..f54d6659713a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -948,6 +948,13 @@ static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from)
 	to->l4_hash = from->l4_hash;
 };
 
+static inline void skb_sender_cpu_clear(struct sk_buff *skb)
+{
+#ifdef CONFIG_XPS
+	skb->sender_cpu = 0;
+#endif
+}
+
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
 static inline unsigned char *skb_end_pointer(const struct sk_buff *skb)
 {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f80507823531..434e78e5254d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4173,7 +4173,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
 	skb->ignore_df = 0;
 	skb_dst_drop(skb);
 	skb->mark = 0;
-	skb->sender_cpu = 0;
+	skb_sender_cpu_clear(skb);
 	skb_init_secmark(skb);
 	secpath_reset(skb);
 	nf_reset(skb);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 787b3c294ce6..d9bc28ac5d1b 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -67,6 +67,7 @@ static int ip_forward_finish(struct sk_buff *skb)
 	if (unlikely(opt->optlen))
 		ip_forward_options(skb);
 
+	skb_sender_cpu_clear(skb);
 	return dst_output(skb);
 }
 
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 0a04a37305d5..7e80b61b51ff 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -318,6 +318,7 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
 
 static inline int ip6_forward_finish(struct sk_buff *skb)
 {
+	skb_sender_cpu_clear(skb);
 	return dst_output(skb);
 }
 
-- 
cgit v1.2.3


From a5af5aa8b67dfdba36c853b70564fd2dfe73d478 Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <a.ryabinin@samsung.com>
Date: Thu, 12 Mar 2015 16:26:11 -0700
Subject: kasan, module, vmalloc: rework shadow allocation for modules

Current approach in handling shadow memory for modules is broken.

Shadow memory could be freed only after memory shadow corresponds it is no
longer used.  vfree() called from interrupt context could use memory its
freeing to store 'struct llist_node' in it:

    void vfree(const void *addr)
    {
    ...
        if (unlikely(in_interrupt())) {
            struct vfree_deferred *p = this_cpu_ptr(&vfree_deferred);
            if (llist_add((struct llist_node *)addr, &p->list))
                    schedule_work(&p->wq);

Later this list node used in free_work() which actually frees memory.
Currently module_memfree() called in interrupt context will free shadow
before freeing module's memory which could provoke kernel crash.

So shadow memory should be freed after module's memory.  However, such
deallocation order could race with kasan_module_alloc() in module_alloc().

Free shadow right before releasing vm area.  At this point vfree()'d
memory is not used anymore and yet not available for other allocations.
New VM_KASAN flag used to indicate that vm area has dynamically allocated
shadow memory so kasan frees shadow only if it was previously allocated.

Signed-off-by: Andrey Ryabinin <a.ryabinin@samsung.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kasan.h   |  5 +++--
 include/linux/vmalloc.h |  1 +
 kernel/module.c         |  2 --
 mm/kasan/kasan.c        | 14 +++++++++++---
 mm/vmalloc.c            |  1 +
 5 files changed, 16 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 72ba725ddf9c..5fa48a21d73e 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -5,6 +5,7 @@
 
 struct kmem_cache;
 struct page;
+struct vm_struct;
 
 #ifdef CONFIG_KASAN
 
@@ -52,7 +53,7 @@ void kasan_slab_free(struct kmem_cache *s, void *object);
 #define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT)
 
 int kasan_module_alloc(void *addr, size_t size);
-void kasan_module_free(void *addr);
+void kasan_free_shadow(const struct vm_struct *vm);
 
 #else /* CONFIG_KASAN */
 
@@ -82,7 +83,7 @@ static inline void kasan_slab_alloc(struct kmem_cache *s, void *object) {}
 static inline void kasan_slab_free(struct kmem_cache *s, void *object) {}
 
 static inline int kasan_module_alloc(void *addr, size_t size) { return 0; }
-static inline void kasan_module_free(void *addr) {}
+static inline void kasan_free_shadow(const struct vm_struct *vm) {}
 
 #endif /* CONFIG_KASAN */
 
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 7d7acb35603d..0ec598381f97 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -17,6 +17,7 @@ struct vm_area_struct;		/* vma defining user mapping in mm_types.h */
 #define VM_VPAGES		0x00000010	/* buffer for pages was vmalloc'ed */
 #define VM_UNINITIALIZED	0x00000020	/* vm_struct is not fully initialized */
 #define VM_NO_GUARD		0x00000040      /* don't add guard page */
+#define VM_KASAN		0x00000080      /* has allocated kasan shadow memory */
 /* bits [20..32] reserved for arch specific ioremap internals */
 
 /*
diff --git a/kernel/module.c b/kernel/module.c
index cc93cf68653c..b3d634ed06c9 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -56,7 +56,6 @@
 #include <linux/async.h>
 #include <linux/percpu.h>
 #include <linux/kmemleak.h>
-#include <linux/kasan.h>
 #include <linux/jump_label.h>
 #include <linux/pfn.h>
 #include <linux/bsearch.h>
@@ -1814,7 +1813,6 @@ static void unset_module_init_ro_nx(struct module *mod) { }
 void __weak module_memfree(void *module_region)
 {
 	vfree(module_region);
-	kasan_module_free(module_region);
 }
 
 void __weak module_arch_cleanup(struct module *mod)
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 78fee632a7ee..936d81661c47 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -29,6 +29,7 @@
 #include <linux/stacktrace.h>
 #include <linux/string.h>
 #include <linux/types.h>
+#include <linux/vmalloc.h>
 #include <linux/kasan.h>
 
 #include "kasan.h"
@@ -414,12 +415,19 @@ int kasan_module_alloc(void *addr, size_t size)
 			GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
 			PAGE_KERNEL, VM_NO_GUARD, NUMA_NO_NODE,
 			__builtin_return_address(0));
-	return ret ? 0 : -ENOMEM;
+
+	if (ret) {
+		find_vm_area(addr)->flags |= VM_KASAN;
+		return 0;
+	}
+
+	return -ENOMEM;
 }
 
-void kasan_module_free(void *addr)
+void kasan_free_shadow(const struct vm_struct *vm)
 {
-	vfree(kasan_mem_to_shadow(addr));
+	if (vm->flags & VM_KASAN)
+		vfree(kasan_mem_to_shadow(vm->addr));
 }
 
 static void register_global(struct kasan_global *global)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 35b25e1340ca..49abccf29a29 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1418,6 +1418,7 @@ struct vm_struct *remove_vm_area(const void *addr)
 		spin_unlock(&vmap_area_lock);
 
 		vmap_debug_free_range(va->va_start, va->va_end);
+		kasan_free_shadow(vm);
 		free_unmap_vmap_area(va);
 		vm->size -= PAGE_SIZE;
 
-- 
cgit v1.2.3


From d3733e5c98e952d419e77fa721912f09d15a2806 Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <a.ryabinin@samsung.com>
Date: Thu, 12 Mar 2015 16:26:14 -0700
Subject: kasan, module: move MODULE_ALIGN macro into <linux/moduleloader.h>

include/linux/moduleloader.h is more suitable place for this macro.
Also change alignment to PAGE_SIZE for CONFIG_KASAN=n as such
alignment already assumed in several places.

Signed-off-by: Andrey Ryabinin <a.ryabinin@samsung.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kasan.h        | 4 ----
 include/linux/moduleloader.h | 8 ++++++++
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 5fa48a21d73e..5bb074431eb0 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -50,15 +50,11 @@ void kasan_krealloc(const void *object, size_t new_size);
 void kasan_slab_alloc(struct kmem_cache *s, void *object);
 void kasan_slab_free(struct kmem_cache *s, void *object);
 
-#define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT)
-
 int kasan_module_alloc(void *addr, size_t size);
 void kasan_free_shadow(const struct vm_struct *vm);
 
 #else /* CONFIG_KASAN */
 
-#define MODULE_ALIGN 1
-
 static inline void kasan_unpoison_shadow(const void *address, size_t size) {}
 
 static inline void kasan_enable_current(void) {}
diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h
index f7556261fe3c..4d0cb9bba93e 100644
--- a/include/linux/moduleloader.h
+++ b/include/linux/moduleloader.h
@@ -84,4 +84,12 @@ void module_arch_cleanup(struct module *mod);
 
 /* Any cleanup before freeing mod->module_init */
 void module_arch_freeing_init(struct module *mod);
+
+#ifdef CONFIG_KASAN
+#include <linux/kasan.h>
+#define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT)
+#else
+#define MODULE_ALIGN PAGE_SIZE
+#endif
+
 #endif
-- 
cgit v1.2.3


From a697c2efba03ac7bfdbffbba7f0f1aa294f7dee0 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 10 Mar 2015 20:31:04 -0700
Subject: of/platform: Fix sparc:allmodconfig build

sparc:allmodconfig fails to build with:

drivers/built-in.o: In function `platform_bus_init':
(.init.text+0x3684): undefined reference to `of_platform_register_reconfig_notifier'

of_platform_register_reconfig_notifier is only declared if both OF_ADDRESS
and OF_DYNAMIC are configured. Yet, the include file only declares a dummy
function if OF_DYNAMIC is not configured. The sparc architecture does not
configure OF_ADDRESS, but does configure OF_DYNAMIC, causing above error.

Fixes: 801d728c10db ("of/reconfig: Add OF_DYNAMIC notifier for platform_bus_type")
Cc: Pantelis Antoniou <pantelis.antoniou@konsulko.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 include/linux/of_platform.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index 8a860f096c35..611a691145c4 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -84,7 +84,7 @@ static inline int of_platform_populate(struct device_node *root,
 static inline void of_platform_depopulate(struct device *parent) { }
 #endif
 
-#ifdef CONFIG_OF_DYNAMIC
+#if defined(CONFIG_OF_DYNAMIC) && defined(CONFIG_OF_ADDRESS)
 extern void of_platform_register_reconfig_notifier(void);
 #else
 static inline void of_platform_register_reconfig_notifier(void) { }
-- 
cgit v1.2.3


From 8cb2c2dc472775479a1a7e78180955f6f1cb0b0a Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.cz>
Date: Thu, 12 Mar 2015 12:55:13 +0100
Subject: livepatch: Fix subtle race with coming and going modules

There is a notifier that handles live patches for coming and going modules.
It takes klp_mutex lock to avoid races with coming and going patches but
it does not keep the lock all the time. Therefore the following races are
possible:

  1. The notifier is called sometime in STATE_MODULE_COMING. The module
     is visible by find_module() in this state all the time. It means that
     new patch can be registered and enabled even before the notifier is
     called. It might create wrong order of stacked patches, see below
     for an example.

   2. New patch could still see the module in the GOING state even after
      the notifier has been called. It will try to initialize the related
      object structures but the module could disappear at any time. There
      will stay mess in the structures. It might even cause an invalid
      memory access.

This patch solves the problem by adding a boolean variable into struct module.
The value is true after the coming and before the going handler is called.
New patches need to be applied when the value is true and they need to ignore
the module when the value is false.

Note that we need to know state of all modules on the system. The races are
related to new patches. Therefore we do not know what modules will get
patched.

Also note that we could not simply ignore going modules. The code from the
module could be called even in the GOING state until mod->exit() finishes.
If we start supporting patches with semantic changes between function
calls, we need to apply new patches to any still usable code.
See below for an example.

Finally note that the patch solves only the situation when a new patch is
registered. There are no such problems when the patch is being removed.
It does not matter who disable the patch first, whether the normal
disable_patch() or the module notifier. There is nothing to do
once the patch is disabled.

Alternative solutions:
======================

+ reject new patches when a patched module is coming or going; this is ugly

+ wait with adding new patch until the module leaves the COMING and GOING
  states; this might be dangerous and complicated; we would need to release
  kgr_lock in the middle of the patch registration to avoid a deadlock
  with the coming and going handlers; also we might need a waitqueue for
  each module which seems to be even bigger overhead than the boolean

+ stop modules from entering COMING and GOING states; wait until modules
  leave these states when they are already there; looks complicated; we would
  need to ignore the module that asked to stop the others to avoid a deadlock;
  also it is unclear what to do when two modules asked to stop others and
  both are in COMING state (situation when two new patches are applied)

+ always register/enable new patches and fix up the potential mess (registered
  patches order) in klp_module_init(); this is nasty and prone to regressions
  in the future development

+ add another MODULE_STATE where the kallsyms are visible but the module is not
  used yet; this looks too complex; the module states are checked on "many"
  locations

Example of patch stacking breakage:
===================================

The notifier could _not_ _simply_ ignore already initialized module objects.
For example, let's have three patches (P1, P2, P3) for functions a() and b()
where a() is from vmcore and b() is from a module M. Something like:

	a()	b()
P1	a1()	b1()
P2	a2()	b2()
P3	a3()	b3(3)

If you load the module M after all patches are registered and enabled.
The ftrace ops for function a() and b() has listed the functions in this
order:

	ops_a->func_stack -> list(a3,a2,a1)
	ops_b->func_stack -> list(b3,b2,b1)

, so the pointer to b3() is the first and will be used.

Then you might have the following scenario. Let's start with state when patches
P1 and P2 are registered and enabled but the module M is not loaded. Then ftrace
ops for b() does not exist. Then we get into the following race:

CPU0					CPU1

load_module(M)

  complete_formation()

  mod->state = MODULE_STATE_COMING;
  mutex_unlock(&module_mutex);

					klp_register_patch(P3);
					klp_enable_patch(P3);

					# STATE 1

  klp_module_notify(M)
    klp_module_notify_coming(P1);
    klp_module_notify_coming(P2);
    klp_module_notify_coming(P3);

					# STATE 2

The ftrace ops for a() and b() then looks:

  STATE1:

	ops_a->func_stack -> list(a3,a2,a1);
	ops_b->func_stack -> list(b3);

  STATE2:
	ops_a->func_stack -> list(a3,a2,a1);
	ops_b->func_stack -> list(b2,b1,b3);

therefore, b2() is used for the module but a3() is used for vmcore
because they were the last added.

Example of the race with going modules:
=======================================

CPU0					CPU1

delete_module()  #SYSCALL

   try_stop_module()
     mod->state = MODULE_STATE_GOING;

   mutex_unlock(&module_mutex);

					klp_register_patch()
					klp_enable_patch()

					#save place to switch universe

					b()     # from module that is going
					  a()   # from core (patched)

   mod->exit();

Note that the function b() can be called until we call mod->exit().

If we do not apply patch against b() because it is in MODULE_STATE_GOING,
it will call patched a() with modified semantic and things might get wrong.

[jpoimboe@redhat.com: use one boolean instead of two]
Signed-off-by: Petr Mladek <pmladek@suse.cz>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/module.h  |  4 ++++
 kernel/livepatch/core.c | 30 ++++++++++++++++++++++++++----
 2 files changed, 30 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index b653d7c0a05a..7232fde6a991 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -344,6 +344,10 @@ struct module {
 	unsigned long *ftrace_callsites;
 #endif
 
+#ifdef CONFIG_LIVEPATCH
+	bool klp_alive;
+#endif
+
 #ifdef CONFIG_MODULE_UNLOAD
 	/* What modules depend on me? */
 	struct list_head source_list;
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 01ca08804f51..3f9f1d6b4c2e 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -89,16 +89,28 @@ static bool klp_is_object_loaded(struct klp_object *obj)
 /* sets obj->mod if object is not vmlinux and module is found */
 static void klp_find_object_module(struct klp_object *obj)
 {
+	struct module *mod;
+
 	if (!klp_is_module(obj))
 		return;
 
 	mutex_lock(&module_mutex);
 	/*
-	 * We don't need to take a reference on the module here because we have
-	 * the klp_mutex, which is also taken by the module notifier.  This
-	 * prevents any module from unloading until we release the klp_mutex.
+	 * We do not want to block removal of patched modules and therefore
+	 * we do not take a reference here. The patches are removed by
+	 * a going module handler instead.
+	 */
+	mod = find_module(obj->name);
+	/*
+	 * Do not mess work of the module coming and going notifiers.
+	 * Note that the patch might still be needed before the going handler
+	 * is called. Module functions can be called even in the GOING state
+	 * until mod->exit() finishes. This is especially important for
+	 * patches that modify semantic of the functions.
 	 */
-	obj->mod = find_module(obj->name);
+	if (mod && mod->klp_alive)
+		obj->mod = mod;
+
 	mutex_unlock(&module_mutex);
 }
 
@@ -767,6 +779,7 @@ static int klp_init_object(struct klp_patch *patch, struct klp_object *obj)
 		return -EINVAL;
 
 	obj->state = KLP_DISABLED;
+	obj->mod = NULL;
 
 	klp_find_object_module(obj);
 
@@ -961,6 +974,15 @@ static int klp_module_notify(struct notifier_block *nb, unsigned long action,
 
 	mutex_lock(&klp_mutex);
 
+	/*
+	 * Each module has to know that the notifier has been called.
+	 * We never know what module will get patched by a new patch.
+	 */
+	if (action == MODULE_STATE_COMING)
+		mod->klp_alive = true;
+	else /* MODULE_STATE_GOING */
+		mod->klp_alive = false;
+
 	list_for_each_entry(patch, &klp_patches, list) {
 		for (obj = patch->objs; obj->funcs; obj++) {
 			if (!klp_is_module(obj) || strcmp(obj->name, mod->name))
-- 
cgit v1.2.3


From e03826d5045e81a66a4fad7be9a8ecdaeb7911cf Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Tue, 17 Mar 2015 15:56:04 +0530
Subject: regulator: palmas: Correct TPS659038 register definition for REGEN2

The register offset for REGEN2_CTRL in different for TPS659038 chip as when
compared with other Palmas family PMICs. In the case of TPS659038 the wrong
offset pointed to PLLEN_CTRL and was causing a hang. Correcting the same.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: stable@vger.kernel.org
---
 drivers/regulator/palmas-regulator.c | 4 ++++
 include/linux/mfd/palmas.h           | 3 +++
 2 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/regulator/palmas-regulator.c b/drivers/regulator/palmas-regulator.c
index 9205f433573c..18198316b6cf 100644
--- a/drivers/regulator/palmas-regulator.c
+++ b/drivers/regulator/palmas-regulator.c
@@ -1572,6 +1572,10 @@ static int palmas_regulators_probe(struct platform_device *pdev)
 	if (!pmic)
 		return -ENOMEM;
 
+	if (of_device_is_compatible(node, "ti,tps659038-pmic"))
+		palmas_generic_regs_info[PALMAS_REG_REGEN2].ctrl_addr =
+							TPS659038_REGEN2_CTRL;
+
 	pmic->dev = &pdev->dev;
 	pmic->palmas = palmas;
 	palmas->pmic = pmic;
diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h
index fb0390a1a498..ee7b1ce7a6f8 100644
--- a/include/linux/mfd/palmas.h
+++ b/include/linux/mfd/palmas.h
@@ -2999,6 +2999,9 @@ enum usb_irq_events {
 #define PALMAS_GPADC_TRIM15					0x0E
 #define PALMAS_GPADC_TRIM16					0x0F
 
+/* TPS659038 regen2_ctrl offset iss different from palmas */
+#define TPS659038_REGEN2_CTRL					0x12
+
 /* TPS65917 Interrupt registers */
 
 /* Registers for function INTERRUPT */
-- 
cgit v1.2.3


From ad41faa88e39af451427c921a0f8b441e104b6fa Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Tue, 17 Mar 2015 11:16:00 +0100
Subject: netdevice.h: fix ndo_bridge_* comments

The argument 'flags' was missing in ndo_bridge_setlink().
ndo_bridge_dellink() was missing.

Fixes: 407af3299ef1 ("bridge: Add netlink interface to configure vlans on bridge ports")
Fixes: add511b38266 ("bridge: add flags argument to ndo_bridge_setlink and ndo_bridge_dellink")
CC: Vlad Yasevich <vyasevic@redhat.com>
CC: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 429d1790a27e..dcf6ec27739b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -965,9 +965,12 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  *	Used to add FDB entries to dump requests. Implementers should add
  *	entries to skb and update idx with the number of entries.
  *
- * int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh)
+ * int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh,
+ *			     u16 flags)
  * int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq,
  *			     struct net_device *dev, u32 filter_mask)
+ * int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh,
+ *			     u16 flags);
  *
  * int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier);
  *	Called to change device carrier. Soft-devices (like dummy, team, etc)
-- 
cgit v1.2.3


From cf39284d41f67964cf42b21bb386c012cf5b7f65 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Wed, 18 Mar 2015 08:57:41 +0800
Subject: regulator: Fix documentation for regmap in the config

dev_get_regulator() does not exist, fix the typo.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/driver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index d4ad5b5a02bb..045f709cb89b 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -316,7 +316,7 @@ struct regulator_desc {
  * @driver_data: private regulator data
  * @of_node: OpenFirmware node to parse for device tree bindings (may be
  *           NULL).
- * @regmap: regmap to use for core regmap helpers if dev_get_regulator() is
+ * @regmap: regmap to use for core regmap helpers if dev_get_regmap() is
  *          insufficient.
  * @ena_gpio_initialized: GPIO controlling regulator enable was properly
  *                        initialized, meaning that >= 0 is a valid gpio
-- 
cgit v1.2.3


From 5067c0469c643512f24786990e315f9c15cc7d24 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Thu, 12 Mar 2015 10:32:18 -0700
Subject: ata: Add a new flag to destinguish sas controller

SAS controller has its own tag allocation, which doesn't directly match to ATA
tag, so SAS and SATA have different code path for ata tags. Originally we use
port->scsi_host (98bd4be1) to destinguish SAS controller, but libsas set
->scsi_host too, so we can't use it for the destinguish, we add a new flag for
this purpose.

Without this patch, the following oops can happen because scsi-mq uses
a host-wide tag map shared among all devices with some integer tag
values >= ATA_MAX_QUEUE.  These unexpectedly high tag values cause
__ata_qc_from_tag() to return NULL, which is then dereferenced in
ata_qc_new_init().

  BUG: unable to handle kernel NULL pointer dereference at 0000000000000058
  IP: [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
  PGD 32adf0067 PUD 32adf1067 PMD 0
  Oops: 0002 [#1] SMP DEBUG_PAGEALLOC
  Modules linked in: iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi igb
  i2c_algo_bit ptp pps_core pm80xx libsas scsi_transport_sas sg coretemp
  eeprom w83795 i2c_i801
  CPU: 4 PID: 1450 Comm: cydiskbench Not tainted 4.0.0-rc3 #1
  Hardware name: Supermicro X8DTH-i/6/iF/6F/X8DTH, BIOS 2.1b       05/04/12
  task: ffff8800ba86d500 ti: ffff88032a064000 task.ti: ffff88032a064000
  RIP: 0010:[<ffffffff804fd46e>]  [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
  RSP: 0018:ffff88032a067858  EFLAGS: 00010046
  RAX: 0000000000000000 RBX: ffff8800ba0d2230 RCX: 000000000000002a
  RDX: ffffffff80505ae0 RSI: 0000000000000020 RDI: ffff8800ba0d2230
  RBP: ffff88032a067868 R08: 0000000000000201 R09: 0000000000000001
  R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800ba0d0000
  R13: ffff8800ba0d2230 R14: ffffffff80505ae0 R15: ffff8800ba0d0000
  FS:  0000000041223950(0063) GS:ffff88033e480000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
  CR2: 0000000000000058 CR3: 000000032a0a3000 CR4: 00000000000006e0
  Stack:
   ffff880329eee758 ffff880329eee758 ffff88032a0678a8 ffffffff80502dad
   ffff8800ba167978 ffff880329eee758 ffff88032bf9c520 ffff8800ba167978
   ffff88032bf9c520 ffff88032bf9a290 ffff88032a0678b8 ffffffff80506909
  Call Trace:
   [<ffffffff80502dad>] ata_scsi_translate+0x3d/0x1b0
   [<ffffffff80506909>] ata_sas_queuecmd+0x149/0x2a0
   [<ffffffffa0046650>] sas_queuecommand+0xa0/0x1f0 [libsas]
   [<ffffffff804ea544>] scsi_dispatch_cmd+0xd4/0x1a0
   [<ffffffff804eb50f>] scsi_queue_rq+0x66f/0x7f0
   [<ffffffff803e5098>] __blk_mq_run_hw_queue+0x208/0x3f0
   [<ffffffff803e54b8>] blk_mq_run_hw_queue+0x88/0xc0
   [<ffffffff803e5c74>] blk_mq_insert_request+0xc4/0x130
   [<ffffffff803e0b63>] blk_execute_rq_nowait+0x73/0x160
   [<ffffffffa0023fca>] sg_common_write+0x3da/0x720 [sg]
   [<ffffffffa0025100>] sg_new_write+0x250/0x360 [sg]
   [<ffffffffa0025feb>] sg_write+0x13b/0x450 [sg]
   [<ffffffff8032ec91>] vfs_write+0xd1/0x1b0
   [<ffffffff8032ee54>] SyS_write+0x54/0xc0
   [<ffffffff80689932>] system_call_fastpath+0x12/0x17

tj: updated description.

Fixes: 12cb5ce101ab ("libata: use blk taging")
Reported-and-tested-by: Tony Battersby <tonyb@cybernetics.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/libata-core.c     | 4 ++--
 drivers/scsi/ipr.c            | 3 ++-
 drivers/scsi/libsas/sas_ata.c | 3 ++-
 include/linux/libata.h        | 1 +
 4 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 4c35f0822d06..ef150ebb4c30 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4737,7 +4737,7 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag)
 		return NULL;
 
 	/* libsas case */
-	if (!ap->scsi_host) {
+	if (ap->flags & ATA_FLAG_SAS_HOST) {
 		tag = ata_sas_allocate_tag(ap);
 		if (tag < 0)
 			return NULL;
@@ -4776,7 +4776,7 @@ void ata_qc_free(struct ata_queued_cmd *qc)
 	tag = qc->tag;
 	if (likely(ata_tag_valid(tag))) {
 		qc->tag = ATA_TAG_POISON;
-		if (!ap->scsi_host)
+		if (ap->flags & ATA_FLAG_SAS_HOST)
 			ata_sas_free_tag(tag, ap);
 	}
 }
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 9219953ee949..d9afc51af7d3 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -6815,7 +6815,8 @@ static struct ata_port_operations ipr_sata_ops = {
 };
 
 static struct ata_port_info sata_port_info = {
-	.flags		= ATA_FLAG_SATA | ATA_FLAG_PIO_DMA,
+	.flags		= ATA_FLAG_SATA | ATA_FLAG_PIO_DMA |
+			  ATA_FLAG_SAS_HOST,
 	.pio_mask	= ATA_PIO4_ONLY,
 	.mwdma_mask	= ATA_MWDMA2,
 	.udma_mask	= ATA_UDMA6,
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 932d9cc98d2f..9c706d8c1441 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -547,7 +547,8 @@ static struct ata_port_operations sas_sata_ops = {
 };
 
 static struct ata_port_info sata_port_info = {
-	.flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ,
+	.flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ |
+		 ATA_FLAG_SAS_HOST,
 	.pio_mask = ATA_PIO4,
 	.mwdma_mask = ATA_MWDMA2,
 	.udma_mask = ATA_UDMA6,
diff --git a/include/linux/libata.h b/include/linux/libata.h
index fc03efa64ffe..6b08cc106c21 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -232,6 +232,7 @@ enum {
 					      * led */
 	ATA_FLAG_NO_DIPM	= (1 << 23), /* host not happy with DIPM */
 	ATA_FLAG_LOWTAG		= (1 << 24), /* host wants lowest available tag */
+	ATA_FLAG_SAS_HOST	= (1 << 25), /* SAS host */
 
 	/* bits 24:31 of ap->flags are reserved for LLD specific flags */
 
-- 
cgit v1.2.3


From 074c238177a75f5e79af3b2cb6a84e54823ef950 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Wed, 25 Mar 2015 15:55:42 -0700
Subject: mm: numa: slow PTE scan rate if migration failures occur

Dave Chinner reported the following on https://lkml.org/lkml/2015/3/1/226

  Across the board the 4.0-rc1 numbers are much slower, and the degradation
  is far worse when using the large memory footprint configs. Perf points
  straight at the cause - this is from 4.0-rc1 on the "-o bhash=101073" config:

   -   56.07%    56.07%  [kernel]            [k] default_send_IPI_mask_sequence_phys
      - default_send_IPI_mask_sequence_phys
         - 99.99% physflat_send_IPI_mask
            - 99.37% native_send_call_func_ipi
                 smp_call_function_many
               - native_flush_tlb_others
                  - 99.85% flush_tlb_page
                       ptep_clear_flush
                       try_to_unmap_one
                       rmap_walk
                       try_to_unmap
                       migrate_pages
                       migrate_misplaced_page
                     - handle_mm_fault
                        - 99.73% __do_page_fault
                             trace_do_page_fault
                             do_async_page_fault
                           + async_page_fault
              0.63% native_send_call_func_single_ipi
                 generic_exec_single
                 smp_call_function_single

This is showing excessive migration activity even though excessive
migrations are meant to get throttled.  Normally, the scan rate is tuned
on a per-task basis depending on the locality of faults.  However, if
migrations fail for any reason then the PTE scanner may scan faster if
the faults continue to be remote.  This means there is higher system CPU
overhead and fault trapping at exactly the time we know that migrations
cannot happen.  This patch tracks when migration failures occur and
slows the PTE scanner.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reported-by: Dave Chinner <david@fromorbit.com>
Tested-by: Dave Chinner <david@fromorbit.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 9 +++++----
 kernel/sched/fair.c   | 8 ++++++--
 mm/huge_memory.c      | 3 ++-
 mm/memory.c           | 3 ++-
 4 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6d77432e14ff..a419b65770d6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1625,11 +1625,11 @@ struct task_struct {
 
 	/*
 	 * numa_faults_locality tracks if faults recorded during the last
-	 * scan window were remote/local. The task scan period is adapted
-	 * based on the locality of the faults with different weights
-	 * depending on whether they were shared or private faults
+	 * scan window were remote/local or failed to migrate. The task scan
+	 * period is adapted based on the locality of the faults with different
+	 * weights depending on whether they were shared or private faults
 	 */
-	unsigned long numa_faults_locality[2];
+	unsigned long numa_faults_locality[3];
 
 	unsigned long numa_pages_migrated;
 #endif /* CONFIG_NUMA_BALANCING */
@@ -1719,6 +1719,7 @@ struct task_struct {
 #define TNF_NO_GROUP	0x02
 #define TNF_SHARED	0x04
 #define TNF_FAULT_LOCAL	0x08
+#define TNF_MIGRATE_FAIL 0x10
 
 #ifdef CONFIG_NUMA_BALANCING
 extern void task_numa_fault(int last_node, int node, int pages, int flags);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7ce18f3c097a..bcfe32088b37 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1609,9 +1609,11 @@ static void update_task_scan_period(struct task_struct *p,
 	/*
 	 * If there were no record hinting faults then either the task is
 	 * completely idle or all activity is areas that are not of interest
-	 * to automatic numa balancing. Scan slower
+	 * to automatic numa balancing. Related to that, if there were failed
+	 * migration then it implies we are migrating too quickly or the local
+	 * node is overloaded. In either case, scan slower
 	 */
-	if (local + shared == 0) {
+	if (local + shared == 0 || p->numa_faults_locality[2]) {
 		p->numa_scan_period = min(p->numa_scan_period_max,
 			p->numa_scan_period << 1);
 
@@ -2080,6 +2082,8 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
 
 	if (migrated)
 		p->numa_pages_migrated += pages;
+	if (flags & TNF_MIGRATE_FAIL)
+		p->numa_faults_locality[2] += pages;
 
 	p->numa_faults[task_faults_idx(NUMA_MEMBUF, mem_node, priv)] += pages;
 	p->numa_faults[task_faults_idx(NUMA_CPUBUF, cpu_node, priv)] += pages;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 0a42d1521aa4..51b3e7c64622 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1350,7 +1350,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (migrated) {
 		flags |= TNF_MIGRATED;
 		page_nid = target_nid;
-	}
+	} else
+		flags |= TNF_MIGRATE_FAIL;
 
 	goto out;
 clear_pmdnuma:
diff --git a/mm/memory.c b/mm/memory.c
index d20e12da3a3c..97839f5c8c30 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3103,7 +3103,8 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (migrated) {
 		page_nid = target_nid;
 		flags |= TNF_MIGRATED;
-	}
+	} else
+		flags |= TNF_MIGRATE_FAIL;
 
 out:
 	if (page_nid != -1)
-- 
cgit v1.2.3